Initial commit - WhyRating Engine (Google Reviews Scraper)

2026-02-02 18:19:00 +00:00
parent 0543a08242
commit 2206ddeff2
136 changed files with 51138 additions and 855 deletions
--- a/.env.nuc
+++ b/.env.nuc
@@ -0,0 +1,21 @@
+# NUC Production Environment Variables
+# Use this to connect to NUC-hosted database
+# Copy to .env: cp .env.nuc .env
+
+# Database (NUC PostgreSQL on port 5437)
+DB_PASSWORD=scraper_nuc_2026
+DATABASE_URL=postgresql://scraper:scraper_nuc_2026@192.168.1.3:5437/scraper
+
+# API Configuration
+API_BASE_URL=http://localhost:8001
+PORT=8001
+
+# Job Concurrency
+MAX_CONCURRENT_JOBS=5
+
+# Canary Test Configuration
+CANARY_TEST_URL=https://www.google.com/maps/place/Soho+Factory/@54.6738155,25.2595844,17z/
+
+# LLM API Keys (for ReviewIQ pipeline)
+OPENAI_API_KEY=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
+ANTHROPIC_API_KEY=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
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,142 @@
+# Google Reviews Scraper Pro - Claude Code Instructions
+
+## Quick Start
+
+### Run with NUC Database (Recommended)
+The PostgreSQL database is hosted on the NUC server. Only the API runs locally.
+
+```bash
+# Use NUC database config
+cp .env.nuc .env
+
+# Start API only (connects to NUC database)
+docker compose -f docker-compose.production.yml -f docker-compose.nuc.yml up -d
+
+# View logs
+docker compose -f docker-compose.production.yml logs -f api
+```
+
+### Run Fully Local (Legacy)
+Runs both PostgreSQL and API locally.
+
+```bash
+# Use local database config
+cp .env.example .env
+# Edit .env with your settings
+
+# Start all services
+docker compose -f docker-compose.production.yml up -d
+```
+
+## NUC Database Connection
+
+| Property | Value |
+|----------|-------|
+| Host | 192.168.1.3 |
+| Port | 5437 |
+| Database | scraper |
+| User | scraper |
+| Password | scraper_nuc_2026 |
+| Coolify UUID | g4s8w4csk8s8ocswg48kkogo |
+
+```bash
+# Direct connection
+psql postgresql://scraper:scraper_nuc_2026@192.168.1.3:5437/scraper
+
+# Via SSH tunnel (if needed)
+ssh -L 5437:localhost:5437 nuc
+```
+
+## Service URLs
+
+| Service | URL |
+|---------|-----|
+| API | http://localhost:8001 |
+| API Docs | http://localhost:8001/docs |
+| VNC (browser debugging) | http://localhost:6080 |
+| VNC (client) | vnc://localhost:5900 |
+
+## Common Commands
+
+```bash
+# Start services
+docker compose -f docker-compose.production.yml -f docker-compose.nuc.yml up -d
+
+# Stop services
+docker compose -f docker-compose.production.yml -f docker-compose.nuc.yml down
+
+# View API logs
+docker logs -f scraper-api
+
+# Rebuild API after code changes
+docker compose -f docker-compose.production.yml -f docker-compose.nuc.yml up -d --build api
+
+# Run a scrape job (example)
+curl -X POST http://localhost:8001/api/jobs \
+  -H "Content-Type: application/json" \
+  -d '{"url": "https://www.google.com/maps/place/..."}'
+
+# Check job status
+curl http://localhost:8001/api/jobs/{job_id}
+```
+
+## Database Management
+
+```bash
+# Connect to NUC database
+docker run --rm -it postgres:15-alpine psql postgresql://scraper:scraper_nuc_2026@192.168.1.3:5437/scraper
+
+# Backup database
+ssh nuc "docker exec postgres-g4s8w4csk8s8ocswg48kkogo pg_dump -U scraper scraper" > backup.sql
+
+# Restore database
+cat backup.sql | ssh nuc "docker exec -i postgres-g4s8w4csk8s8ocswg48kkogo psql -U scraper scraper"
+```
+
+## Project Structure
+
+```
+├── api/                 # FastAPI backend
+├── packages/
+│   ├── pipeline-core/   # Shared pipeline utilities
+│   └── reviewiq-pipeline/ # Review analysis pipeline
+├── web/                 # Next.js frontend (optional)
+├── db/init/             # Database initialization scripts
+├── docker-compose.production.yml  # Main compose file
+├── docker-compose.nuc.yml         # NUC database override
+├── .env.nuc             # NUC environment config
+└── Dockerfile           # API container build
+```
+
+## Troubleshooting
+
+### API can't connect to NUC database
+```bash
+# Check NUC is reachable
+nc -zv 192.168.1.3 5437
+
+# Check database is running
+ssh nuc "docker ps | grep postgres-g4s8w4csk8s8ocswg48kkogo"
+
+# Restart database on NUC
+ssh nuc "docker restart postgres-g4s8w4csk8s8ocswg48kkogo"
+```
+
+### Chrome/Scraping issues
+```bash
+# Check VNC for visual debugging
+open http://localhost:6080
+
+# Increase shared memory if crashes
+# Edit docker-compose: shm_size: 4gb
+```
+
+## Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| DATABASE_URL | PostgreSQL connection string | (required) |
+| API_BASE_URL | Public API URL | http://localhost:8001 |
+| MAX_CONCURRENT_JOBS | Parallel scrape jobs | 5 |
+| OPENAI_API_KEY | For ReviewIQ analysis | (optional) |
+| ANTHROPIC_API_KEY | For ReviewIQ analysis | (optional) |
--- a/4
+++ b/4
@@ -64,6 +64,10 @@ COPY workers/ ./workers/
 COPY api_server_production.py .
 COPY config.yaml .

+# Copy and install pipeline packages
+COPY packages/ ./packages/
+RUN pip install --no-cache-dir -e ./packages/pipeline-core -e ./packages/reviewiq-pipeline
+
 # Create startup script for Xvfb + VNC + API server
 RUN echo '#!/bin/bash\n\
 # Start Xvfb (virtual display) in background\n\
--- a/api/routes/init.py
+++ b/api/routes/init.py
@@ -7,6 +7,7 @@ from api.routes.batches import router as batches_router, set_database as set_bat
 from api.routes.dashboard import router as dashboard_router, set_database as set_dashboard_db
 from api.routes.admin import router as admin_router, set_database as set_admin_db
 from api.routes.pipelines import router as pipelines_router, set_database as set_pipelines_db
+from api.routes.reviewiq_analytics import router as reviewiq_analytics_router, set_database as set_reviewiq_analytics_db

 __all__ = [
    'batches_router',
@@ -17,4 +18,6 @@ __all__ = [
    'set_admin_db',
    'pipelines_router',
    'set_pipelines_db',
+    'reviewiq_analytics_router',
+    'set_reviewiq_analytics_db',
 ]
--- a/api/routes/pipelines.py
+++ b/api/routes/pipelines.py
@@ -277,15 +277,17 @@ async def execute_pipeline(

    pipeline = await _get_pipeline_instance(pipeline_id)

+    # Create execution record
+    execution_id = str(uuid.uuid4())
+
    # Prepare input data
    input_data = request.input_data or {}
    if request.job_id:
        input_data["job_id"] = request.job_id
    if request.business_id:
        input_data["business_id"] = request.business_id
-
-    # Create execution record
-    execution_id = str(uuid.uuid4())
+    # Pass execution_id so Stage 5 synthesis can store results
+    input_data["execution_id"] = execution_id
    stages = request.stages or pipeline.get_stage_names()

    # Prepare input summary for storage
@@ -604,6 +606,7 @@ async def get_widget_data(
    pipeline_id: str,
    widget_id: str,
    business_id: str | None = Query(None, description="Filter by business"),
+    job_id: str | None = Query(None, description="Filter by job ID"),
    time_range: str = Query("30d", description="Time range (e.g., 7d, 30d, 90d)"),
    page: int = Query(1, ge=1, description="Page number for paginated widgets"),
    page_size: int = Query(10, ge=1, le=100, description="Items per page"),
@@ -621,6 +624,7 @@ async def get_widget_data(
    try:
        params = {
            "business_id": business_id,
+            "job_id": job_id,
            "time_range": time_range,
            "page": page,
            "page_size": page_size,
--- a/api/routes/sessions.py
+++ b/api/routes/sessions.py
@@ -0,0 +1,300 @@
+"""
+Session Routes for Google Reviews Scraper API
+
+Provides session handoff endpoints for efficient validation → scraping workflow.
+Uses scraper v1.2.0 with session support.
+
+Endpoints:
+  POST /sessions/validate - Validate URL, keep browser alive, return session_id
+  POST /sessions/scrape   - Scrape using existing session (skips navigation)
+  GET  /sessions          - List active sessions
+  GET  /sessions/{id}     - Get session status
+  DELETE /sessions/{id}   - Release session manually
+
+Usage:
+  1. POST /sessions/validate with URL → returns session_id
+  2. Frontend shows business info to user for confirmation
+  3. POST /sessions/scrape with session_id → scrapes using existing browser
+"""
+
+import asyncio
+import logging
+from typing import Optional, Dict, Any
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel, HttpUrl, Field
+
+# Import v1.2.0 scraper with session support
+from scrapers.google_reviews.v1_2_0 import (
+    validate_with_session,
+    scrape_with_session,
+    LogCapture
+)
+from scrapers.google_reviews.session_manager import get_session_manager
+
+log = logging.getLogger("api_sessions")
+
+# Create router
+router = APIRouter(prefix="/sessions", tags=["sessions"])
+
+
+# ============================================================================
+# Request/Response Models
+# ============================================================================
+
+class GeoLocation(BaseModel):
+    lat: float
+    lng: float
+
+class Viewport(BaseModel):
+    width: int
+    height: int
+
+class BrowserFingerprint(BaseModel):
+    userAgent: Optional[str] = None
+    timezone: Optional[str] = None
+    language: Optional[str] = None
+    platform: Optional[str] = None
+    viewport: Optional[Viewport] = None
+    geolocation: Optional[GeoLocation] = None
+
+
+class ValidateRequest(BaseModel):
+    """Request body for session validation."""
+    url: HttpUrl = Field(..., description="Google Maps URL to validate")
+    browser_fingerprint: Optional[BrowserFingerprint] = None
+    geolocation: Optional[GeoLocation] = None
+    session_ttl: int = Field(300, description="Session TTL in seconds (default: 5 min)", ge=60, le=900)
+
+
+class ValidateResponse(BaseModel):
+    """Response from session validation."""
+    session_id: Optional[str] = Field(None, description="Session ID for scraping (None if validation failed)")
+    business_info: Dict[str, Any] = Field(default_factory=dict)
+    total_reviews: Optional[int] = None
+    success: bool
+    error: Optional[str] = None
+    expires_in: Optional[int] = Field(None, description="Seconds until session expires")
+
+
+class ScrapeWithSessionRequest(BaseModel):
+    """Request body for scraping with an existing session."""
+    session_id: str = Field(..., description="Session ID from validation")
+    max_reviews: Optional[int] = Field(None, description="Max reviews to collect (None = unlimited)", ge=1, le=50000)
+    sort_strategy: str = Field("auto", description="Sort strategy: auto, multi, newest, lowest, highest, relevant")
+    initial_sort: Optional[str] = Field(None, description="Initial sort order for first pass")
+
+
+class ScrapeWithSessionResponse(BaseModel):
+    """Response from session-based scraping."""
+    reviews: list = Field(default_factory=list)
+    count: int = 0
+    total_reviews: int = 0
+    success: bool
+    error: Optional[str] = None
+    time: float = 0
+    session_reused: bool = Field(True, description="Indicates session was reused from validation")
+    business_info: Dict[str, Any] = Field(default_factory=dict)
+
+
+class SessionInfo(BaseModel):
+    """Information about an active session."""
+    session_id: str
+    business: str
+    state: str
+    total_reviews: int
+    age_seconds: int
+    ttl_remaining: int
+
+
+class SessionListResponse(BaseModel):
+    """Response listing all active sessions."""
+    total_sessions: int
+    sessions: list
+
+
+# ============================================================================
+# Endpoints
+# ============================================================================
+
+@router.post("/validate", response_model=ValidateResponse, summary="Validate URL and Create Session")
+async def validate_and_create_session(request: ValidateRequest):
+    """
+    Validate a Google Maps URL and keep the browser session alive for scraping.
+
+    This endpoint:
+    1. Creates a Chrome browser
+    2. Navigates to the Google Maps URL
+    3. Extracts business information
+    4. Keeps the browser ALIVE and returns a session_id
+
+    The session can then be used with /sessions/scrape to continue scraping
+    without re-navigating (saves 4-16 seconds per job).
+
+    Session expires after TTL (default: 5 minutes).
+    """
+    try:
+        url = str(request.url)
+        log.info(f"Validating URL with session: {url[:80]}...")
+
+        # Build fingerprint dict
+        fingerprint = None
+        if request.browser_fingerprint:
+            fp = request.browser_fingerprint
+            fingerprint = {
+                "userAgent": fp.userAgent,
+                "timezone": fp.timezone,
+                "language": fp.language,
+                "platform": fp.platform,
+            }
+            if fp.viewport:
+                fingerprint["viewport"] = {"width": fp.viewport.width, "height": fp.viewport.height}
+            if fp.geolocation:
+                fingerprint["geolocation"] = {"lat": fp.geolocation.lat, "lng": fp.geolocation.lng}
+        elif request.geolocation:
+            fingerprint = {"geolocation": {"lat": request.geolocation.lat, "lng": request.geolocation.lng}}
+
+        # Run validation in thread (blocks Chrome operations)
+        result = await asyncio.to_thread(
+            validate_with_session,
+            url=url,
+            headless=False,  # Headed Chrome with Xvfb
+            browser_fingerprint=fingerprint,
+            session_ttl=request.session_ttl
+        )
+
+        return ValidateResponse(
+            session_id=result.get("session_id"),
+            business_info=result.get("business_info", {}),
+            total_reviews=result.get("total_reviews"),
+            success=result.get("success", False),
+            error=result.get("error"),
+            expires_in=result.get("expires_in")
+        )
+
+    except Exception as e:
+        log.error(f"Session validation error: {e}")
+        return ValidateResponse(
+            session_id=None,
+            success=False,
+            error=str(e)
+        )
+
+
+@router.post("/scrape", response_model=ScrapeWithSessionResponse, summary="Scrape Using Existing Session")
+async def scrape_using_session(request: ScrapeWithSessionRequest):
+    """
+    Scrape reviews using an existing validated session.
+
+    This endpoint:
+    1. Retrieves the browser from the session (already on Google Maps page)
+    2. Skips navigation and consent handling (already done)
+    3. Clicks Reviews tab and starts scraping
+    4. Releases the session when done
+
+    Saves 4-16 seconds compared to starting fresh.
+    """
+    try:
+        log.info(f"Scraping with session {request.session_id}...")
+
+        # Run scraping in thread
+        result = await asyncio.to_thread(
+            scrape_with_session,
+            session_id=request.session_id,
+            max_reviews=request.max_reviews,
+            sort_strategy=request.sort_strategy,
+            initial_sort=request.initial_sort
+        )
+
+        return ScrapeWithSessionResponse(
+            reviews=result.get("reviews", []),
+            count=result.get("count", 0),
+            total_reviews=result.get("total_reviews", 0),
+            success=result.get("success", False),
+            error=result.get("error"),
+            time=result.get("time", 0),
+            session_reused=result.get("session_reused", True),
+            business_info=result.get("business_info", {})
+        )
+
+    except Exception as e:
+        log.error(f"Session scraping error: {e}")
+        return ScrapeWithSessionResponse(
+            success=False,
+            error=str(e)
+        )
+
+
+@router.get("", response_model=SessionListResponse, summary="List Active Sessions")
+async def list_sessions():
+    """
+    List all active browser sessions.
+
+    Returns information about each session including:
+    - Business name
+    - State (validated, scraping)
+    - Time until expiration
+    """
+    session_manager = get_session_manager()
+    stats = session_manager.get_stats()
+
+    return SessionListResponse(
+        total_sessions=stats.get("total_sessions", 0),
+        sessions=stats.get("sessions", [])
+    )
+
+
+@router.get("/{session_id}", summary="Get Session Status")
+async def get_session_status(session_id: str):
+    """
+    Get the status of a specific session.
+    """
+    session_manager = get_session_manager()
+    session = session_manager.get_session(session_id)
+
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found or expired")
+
+    import time
+    now = time.time()
+
+    return {
+        "session_id": session.session_id,
+        "business": session.business_info.get("name", "unknown"),
+        "state": session.state,
+        "total_reviews": session.total_reviews,
+        "url": session.url,
+        "age_seconds": int(now - session.created_at),
+        "ttl_remaining": int(session.expires_at - now)
+    }
+
+
+@router.delete("/{session_id}", summary="Release Session")
+async def release_session(session_id: str):
+    """
+    Manually release a session and close its browser.
+
+    Use this if the user cancels before scraping.
+    """
+    session_manager = get_session_manager()
+    session = session_manager.get_session(session_id)
+
+    if not session:
+        raise HTTPException(status_code=404, detail=f"Session {session_id} not found or expired")
+
+    session_manager.release_session(session_id, reason="manual_release")
+
+    return {
+        "success": True,
+        "message": f"Session {session_id} released"
+    }
+
+
+# ============================================================================
+# Helper to register router with main app
+# ============================================================================
+
+def register_session_routes(app):
+    """Register session routes with the FastAPI app."""
+    app.include_router(router)
+    log.info("Session routes registered at /sessions")
--- a/api_server_production.py
+++ b/api_server_production.py
@@ -61,7 +61,9 @@ from api.routes import (
    dashboard_router, set_dashboard_db,
    admin_router, set_admin_db,
    pipelines_router, set_pipelines_db,
+    reviewiq_analytics_router, set_reviewiq_analytics_db,
 )
+from api.routes.sessions import router as sessions_router

 # Configure logging
 logging.basicConfig(
@@ -110,6 +112,7 @@ async def lifespan(app: FastAPI):
    set_dashboard_db(db)
    set_admin_db(db)
    set_pipelines_db(db.pool)  # Pipeline router uses raw asyncpg pool
+    set_reviewiq_analytics_db(db.pool)  # ReviewIQ analytics uses raw asyncpg pool

    # Initialize health check system with canary monitoring
    # DISABLED: Canary tests consume Google Maps requests and trigger rate limiting
@@ -124,12 +127,15 @@ async def lifespan(app: FastAPI):

    # Start Chrome worker pools (1 for validation, 2 for scraping)
    # These pre-warm Chrome instances for instant availability
-    # headless=False because Docker uses Xvfb virtual display for better compatibility
+    # In Docker: headless=False with Xvfb virtual display for better compatibility
+    # Locally: use CHROME_HEADLESS env var to control (default: headed for scraping)
+    is_docker = os.path.exists("/.dockerenv") or os.environ.get("DOCKER_CONTAINER", "false").lower() == "true"
+    chrome_headless = os.environ.get("CHROME_HEADLESS", "false").lower() == "true"
    await asyncio.to_thread(
        start_worker_pools,
        validation_size=1,
        scraping_size=2,
-        headless=False
+        headless=chrome_headless if not is_docker else False
    )
    log.info("Chrome worker pools started (1 validation + 2 scraping)")

@@ -172,6 +178,8 @@ app.include_router(batches_router)
 app.include_router(dashboard_router)
 app.include_router(admin_router)
 app.include_router(pipelines_router)
+app.include_router(reviewiq_analytics_router)
+app.include_router(sessions_router)  # Session handoff for validation → scraping


 # ==================== Request/Response Models ====================
@@ -220,6 +228,10 @@ class ScrapeRequest(BaseModel):
    callback_url: Optional[HttpUrl] = Field(None, description="URL to call when job completes (alternative to webhook)")
    scraper_version: Optional[str] = Field(None, description="Specific scraper version to use")
    scraper_variant: Optional[str] = Field(None, description="Scraper variant (e.g., 'fast', 'thorough', 'stealth')")
+    # Testing options
+    max_reviews: Optional[int] = Field(None, description="Maximum reviews to collect (for testing, default: unlimited)", ge=1, le=10000)
+    # Session handoff (v1.2.0) - reuse browser from validation
+    session_id: Optional[str] = Field(None, description="Session ID from /sessions/validate for browser reuse")


 class GoogleReviewsScrapeRequest(BaseModel):
@@ -236,6 +248,10 @@ class GoogleReviewsScrapeRequest(BaseModel):
    callback_url: Optional[HttpUrl] = Field(None, description="URL to call when job completes (alternative to webhook)")
    scraper_version: Optional[str] = Field(None, description="Specific scraper version to use")
    scraper_variant: Optional[str] = Field(None, description="Scraper variant (e.g., 'fast', 'thorough', 'stealth')")
+    # Testing options
+    max_reviews: Optional[int] = Field(None, description="Maximum reviews to collect (for testing, default: unlimited)", ge=1, le=10000)
+    # Session handoff (v1.2.0) - reuse browser from validation
+    session_id: Optional[str] = Field(None, description="Session ID from /sessions/validate for browser reuse")


 class JobResponse(BaseModel):
@@ -548,16 +564,21 @@ async def get_job(job_id: UUID):
        except:
            review_topics = None

-    # Extract business info from metadata if available
-    metadata = job.get('metadata')
-    if isinstance(metadata, str):
-        try:
-            metadata = json.loads(metadata)
-        except:
-            metadata = None
+    # Read business info from dedicated columns (with fallback to metadata for older jobs)
+    business_name = job.get('business_name')
+    business_category = job.get('business_category')

-    business_name = metadata.get('business_name') if metadata else None
-    business_category = metadata.get('business_category') if metadata else None
+    # Fallback to metadata for jobs created before migration
+    if not business_name or not business_category:
+        metadata = job.get('metadata')
+        if isinstance(metadata, str):
+            try:
+                metadata = json.loads(metadata)
+            except:
+                metadata = None
+        if metadata:
+            business_name = business_name or metadata.get('business_name')
+            # Note: business_category was not previously stored in metadata

    return JobResponse(
        job_id=str(job['job_id']),
@@ -1051,17 +1072,22 @@ async def list_jobs(

    result = []
    for job in jobs:
-        # Extract business info from metadata if available
-        metadata = job.get('metadata')
-        if isinstance(metadata, str):
-            try:
-                metadata = json.loads(metadata)
-            except:
-                metadata = None
+        # Read business info from dedicated columns (with fallback to metadata for older jobs)
+        business_name = job.get('business_name')
+        business_address = job.get('business_address')
+        business_category = job.get('business_category')

-        business_name = metadata.get('business_name') if metadata else None
-        business_address = metadata.get('business_address') if metadata else None
-        business_category = metadata.get('business_category') if metadata else None
+        # Fallback to metadata for jobs created before migration
+        if not business_name:
+            metadata = job.get('metadata')
+            if isinstance(metadata, str):
+                try:
+                    metadata = json.loads(metadata)
+                except:
+                    metadata = None
+            if metadata:
+                business_name = business_name or metadata.get('business_name')
+                business_address = business_address or metadata.get('business_address')

        # Parse review_topics if it's a string
        review_topics = job.get('review_topics')
@@ -1191,6 +1217,193 @@ async def get_stats():
    return StatsResponse(**stats)


+# ==================== GBP Categories Endpoints ====================
+
+@app.get("/categories", summary="Get GBP Categories")
+async def get_categories(
+    search: Optional[str] = Query(None, description="Search term for category name"),
+    parent: Optional[str] = Query(None, description="Parent path (ltree) to filter children"),
+    level: Optional[int] = Query(None, description="Category level (1-4)", ge=1, le=4),
+    limit: int = Query(5000, description="Maximum number of results", ge=1, le=10000),
+    offset: int = Query(0, description="Offset for pagination", ge=0),
+):
+    """
+    Get Google Business Profile categories.
+
+    Supports filtering by:
+    - search: Text search in category name
+    - parent: Get children of a specific path
+    - level: Filter by hierarchy level (1=Sector, 2=Business Type, 3=Sub-category, 4=Category)
+    """
+    if not db or not db.pool:
+        raise HTTPException(status_code=500, detail="Database not initialized")
+
+    async with db.pool.acquire() as conn:
+        # Build query dynamically based on filters
+        conditions = []
+        params = []
+        param_idx = 1
+
+        if search:
+            conditions.append(f"name ILIKE ${param_idx}")
+            params.append(f"%{search}%")
+            param_idx += 1
+
+        if parent:
+            conditions.append(f"path <@ ${param_idx}::ltree AND path != ${param_idx}::ltree")
+            params.append(parent)
+            param_idx += 1
+
+        if level:
+            conditions.append(f"level = ${param_idx}")
+            params.append(level)
+            param_idx += 1
+
+        where_clause = " AND ".join(conditions) if conditions else "TRUE"
+
+        # Get total count
+        count_query = f"SELECT COUNT(*) FROM gbp_categories WHERE {where_clause}"
+        total = await conn.fetchval(count_query, *params)
+
+        # Get categories
+        query = f"""
+            SELECT id, name, slug, path::text as path, level, parent_id, category_count
+            FROM gbp_categories
+            WHERE {where_clause}
+            ORDER BY path
+            LIMIT ${param_idx} OFFSET ${param_idx + 1}
+        """
+        params.extend([limit, offset])
+
+        rows = await conn.fetch(query, *params)
+        categories = [dict(row) for row in rows]
+
+        return {
+            "categories": categories,
+            "total": total,
+            "limit": limit,
+            "offset": offset,
+        }
+
+
+@app.get("/categories/tree", summary="Get GBP Categories Tree")
+async def get_categories_tree(
+    root: Optional[str] = Query(None, description="Root path to start the tree from"),
+    max_depth: int = Query(4, description="Maximum depth of the tree", ge=1, le=4),
+):
+    """
+    Get categories as a hierarchical tree structure.
+
+    Returns nested categories starting from root (or all roots if not specified).
+    """
+    if not db or not db.pool:
+        raise HTTPException(status_code=500, detail="Database not initialized")
+
+    async with db.pool.acquire() as conn:
+        if root:
+            # Get subtree starting from root
+            query = """
+                SELECT id, name, slug, path::text as path, level, parent_id, category_count
+                FROM gbp_categories
+                WHERE path <@ $1::ltree
+                ORDER BY path
+            """
+            rows = await conn.fetch(query, root)
+        else:
+            # Get all categories
+            query = """
+                SELECT id, name, slug, path::text as path, level, parent_id, category_count
+                FROM gbp_categories
+                ORDER BY path
+            """
+            rows = await conn.fetch(query)
+
+        categories = [dict(row) for row in rows]
+
+        # Build tree structure
+        def build_tree(cats, parent_path=None, current_depth=1):
+            if current_depth > max_depth:
+                return []
+
+            result = []
+            for cat in cats:
+                cat_parts = cat['path'].split('.')
+
+                if parent_path is None:
+                    # Root level - single segment paths
+                    if len(cat_parts) == 1:
+                        children = build_tree(cats, cat['path'], current_depth + 1)
+                        result.append({
+                            **cat,
+                            'children': children if children else None
+                        })
+                else:
+                    # Check if this is a direct child of parent_path
+                    parent_parts = parent_path.split('.')
+                    if (len(cat_parts) == len(parent_parts) + 1 and
+                        cat['path'].startswith(parent_path + '.')):
+                        children = build_tree(cats, cat['path'], current_depth + 1)
+                        result.append({
+                            **cat,
+                            'children': children if children else None
+                        })
+
+            return result
+
+        tree = build_tree(categories)
+
+        return {
+            "tree": tree,
+            "total": len(categories),
+        }
+
+
+@app.get("/categories/{path:path}", summary="Get Category by Path")
+async def get_category_by_path(path: str):
+    """
+    Get a specific category by its ltree path.
+
+    Also returns ancestors and direct children.
+    """
+    if not db or not db.pool:
+        raise HTTPException(status_code=500, detail="Database not initialized")
+
+    async with db.pool.acquire() as conn:
+        # Get the category
+        category = await conn.fetchrow("""
+            SELECT id, name, slug, path::text as path, level, parent_id, category_count
+            FROM gbp_categories
+            WHERE path = $1::ltree
+        """, path)
+
+        if not category:
+            raise HTTPException(status_code=404, detail="Category not found")
+
+        category = dict(category)
+
+        # Get ancestors
+        ancestors = await conn.fetch("""
+            SELECT id, name, slug, path::text as path, level, parent_id, category_count
+            FROM gbp_categories
+            WHERE path @> $1::ltree AND path != $1::ltree
+            ORDER BY path
+        """, path)
+
+        # Get direct children
+        children = await conn.fetch("""
+            SELECT id, name, slug, path::text as path, level, parent_id, category_count
+            FROM gbp_categories
+            WHERE path ~ ($1 || '.*{1}')::lquery
+            ORDER BY name
+        """, path)
+
+        return {
+            "category": category,
+            "ancestors": [dict(a) for a in ancestors],
+            "children": [dict(c) for c in children],
+        }
+
+
@app.get("/pool-stats", summary="Get Worker Pool Statistics")
 async def pool_stats():
    """Get Chrome worker pool statistics"""
@@ -1331,10 +1544,82 @@ async def get_crash_report(job_id: UUID):
    )


+# Available sort orders for retry strategy
+SORT_ORDERS = ["newest", "lowest", "highest", "relevant"]
+
+# Fingerprint rotation for retry - realistic browser profiles to avoid bot detection
+import random
+
+FINGERPRINT_PROFILES = [
+    {
+        "platform": "MacIntel",
+        "timezone": "Europe/Madrid",
+        "language": "es-ES",
+        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "viewport": {"width": 1440, "height": 900}
+    },
+    {
+        "platform": "Win32",
+        "timezone": "Europe/London",
+        "language": "en-GB",
+        "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+        "viewport": {"width": 1920, "height": 1080}
+    },
+    {
+        "platform": "MacIntel",
+        "timezone": "America/New_York",
+        "language": "en-US",
+        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
+        "viewport": {"width": 1680, "height": 1050}
+    },
+    {
+        "platform": "Win32",
+        "timezone": "Europe/Paris",
+        "language": "fr-FR",
+        "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "viewport": {"width": 1366, "height": 768}
+    },
+    {
+        "platform": "MacIntel",
+        "timezone": "Europe/Berlin",
+        "language": "de-DE",
+        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
+        "viewport": {"width": 1512, "height": 982}
+    },
+]
+
+def get_rotated_fingerprint(retry_attempt: int = 0, previous_fingerprints: list = None) -> dict:
+    """
+    Get a fingerprint profile for retry, avoiding previously used ones.
+
+    Args:
+        retry_attempt: Which retry attempt this is (0-indexed)
+        previous_fingerprints: List of previously used fingerprint platforms
+
+    Returns:
+        A fingerprint profile dict
+    """
+    previous_fingerprints = previous_fingerprints or []
+
+    # Filter out previously used profiles
+    available = [fp for fp in FINGERPRINT_PROFILES
+                 if fp["platform"] not in previous_fingerprints]
+
+    # If all used, cycle back
+    if not available:
+        available = FINGERPRINT_PROFILES
+
+    # Select based on retry attempt (deterministic but varied)
+    selected = available[retry_attempt % len(available)]
+
+    return selected.copy()
+
+
@app.post("/jobs/{job_id}/retry", response_model=RetryJobResponse, summary="Retry Failed Job")
 async def retry_job(
    job_id: UUID,
-    apply_fix: bool = Query(False, description="Apply auto-fix parameters based on crash analysis")
+    apply_fix: bool = Query(False, description="Apply auto-fix parameters based on crash analysis"),
+    next_sort: bool = Query(False, description="Use a different sort order than the original job (for partial jobs)")
 ):
    """
    Retry a failed or partial job, optionally applying auto-fix parameters.
@@ -1344,6 +1629,11 @@ async def retry_job(
    - Applies recommended parameter adjustments (e.g., reduced batch size for memory issues)
    - Creates a new job with the adjusted parameters

+    When next_sort=true:
+    - Uses a different sort order than previously attempted
+    - Helps get different reviews when stuck at ~1000 limit
+    - Tracks sort_orders_attempted for review merging
+
    Returns the new job ID for tracking.
    """
    if not db:
@@ -1418,6 +1708,72 @@ async def retry_job(
            applied_fixes = analysis.auto_fix_params
            log.info(f"Applying auto-fix for pattern '{analysis.pattern}': {applied_fixes}")

+    # Handle next_sort: use a different sort order than previously attempted
+    selected_sort = None
+    if next_sort:
+        # Get previously attempted sort orders
+        sort_orders_attempted = original_metadata.get('sort_orders_attempted', [])
+
+        # If no sort was tracked, assume "newest" was used (default)
+        if not sort_orders_attempted:
+            initial_sort_used = original_metadata.get('initial_sort_used', 'newest')
+            sort_orders_attempted = [initial_sort_used]
+
+        # Find next unused sort order
+        for sort_order in SORT_ORDERS:
+            if sort_order not in sort_orders_attempted:
+                selected_sort = sort_order
+                break
+
+        if selected_sort:
+            # Set the new sort strategy
+            original_metadata['initial_sort'] = selected_sort
+            original_metadata['sort_strategy'] = 'single'  # Don't auto-trigger multi-sort
+
+            # Track all attempted sorts (including this one)
+            original_metadata['sort_orders_attempted'] = sort_orders_attempted + [selected_sort]
+
+            # Track retry chain for review merging
+            if 'retry_chain' not in original_metadata:
+                original_metadata['retry_chain'] = [str(job_id)]
+            else:
+                original_metadata['retry_chain'].append(str(job_id))
+
+            original_metadata['retry_info'] = original_metadata.get('retry_info', {})
+            original_metadata['retry_info']['original_job_id'] = str(job_id)
+            original_metadata['retry_info']['retry_reason'] = 'next_sort'
+            original_metadata['retry_info']['selected_sort'] = selected_sort
+
+            log.info(f"Retry with next_sort: using '{selected_sort}' (previously tried: {sort_orders_attempted})")
+        else:
+            log.warn(f"All sort orders already attempted: {sort_orders_attempted}")
+
+    # Fingerprint rotation: if bot was detected, use a different fingerprint
+    selected_fingerprint = None
+    if next_sort and original_metadata.get('bot_detected', False):
+        # Get previously used fingerprints
+        previous_fingerprints = original_metadata.get('fingerprints_used', [])
+        retry_count = len(original_metadata.get('retry_chain', []))
+
+        # Get a rotated fingerprint
+        selected_fingerprint = get_rotated_fingerprint(retry_count, previous_fingerprints)
+
+        # Store the fingerprint in metadata
+        original_metadata['browser_fingerprint'] = selected_fingerprint
+
+        # Track used fingerprints
+        if 'fingerprints_used' not in original_metadata:
+            original_metadata['fingerprints_used'] = []
+        original_metadata['fingerprints_used'].append(selected_fingerprint['platform'])
+
+        original_metadata['retry_info']['fingerprint_rotated'] = True
+        original_metadata['retry_info']['new_fingerprint'] = {
+            'platform': selected_fingerprint['platform'],
+            'timezone': selected_fingerprint['timezone']
+        }
+
+        log.info(f"Fingerprint rotated for retry: {selected_fingerprint['platform']}, {selected_fingerprint['timezone']}")
+
    # Create new job with same URL and (possibly modified) metadata
    new_job_id = await db.create_job(
        url=original_job['url'],
@@ -1431,11 +1787,28 @@ async def retry_job(

    log.info(f"Created retry job {new_job_id} for original job {job_id}")

+    # Build response message
+    message = f"Retry job created from original job {job_id}"
+    if selected_sort:
+        message += f" (using sort: {selected_sort})"
+    if selected_fingerprint:
+        message += f" (fingerprint: {selected_fingerprint['platform']}/{selected_fingerprint['timezone']})"
+
+    # Build applied_fixes response
+    retry_fixes = {}
+    if selected_sort:
+        retry_fixes["selected_sort"] = selected_sort
+    if selected_fingerprint:
+        retry_fixes["fingerprint"] = {
+            "platform": selected_fingerprint["platform"],
+            "timezone": selected_fingerprint["timezone"]
+        }
+
    return RetryJobResponse(
        job_id=str(new_job_id),
        status="started",
-        message=f"Retry job created from original job {job_id}",
-        applied_fixes=applied_fixes
+        message=message,
+        applied_fixes=applied_fixes if applied_fixes else (retry_fixes if retry_fixes else None)
    )


@@ -1529,8 +1902,9 @@ async def liveness():

    Use this for Kubernetes liveness probe - restart container if fails.
    """
+    # If health system is disabled, just return healthy (server is alive)
    if not health_system:
-        raise HTTPException(status_code=503, detail="Health system not initialized")
+        return {"status": "healthy", "message": "Server is alive (health system disabled)"}

    return await health_system.check_liveness()

@@ -1542,8 +1916,12 @@ async def readiness():

    Use this for Kubernetes readiness probe - remove from load balancer if fails.
    """
+    # If health system is disabled, check if DB is connected
    if not health_system:
-        raise HTTPException(status_code=503, detail="Health system not initialized")
+        if db and db.pool:
+            return {"status": "ready", "message": "Server is ready (health system disabled)"}
+        else:
+            raise HTTPException(status_code=503, detail="Database not connected")

    result = await health_system.check_readiness()

@@ -1728,17 +2106,67 @@ async def run_scraping_job(job_id: UUID):
            scraper_func, actual_version = get_scraper_for_version(requested_version)
            log.info(f"Using scraper version {actual_version} for job {job_id}")

-            # Run scraping with progress callback and shared log capture
-            # headless=False because Docker uses Xvfb virtual display
-            result = await asyncio.to_thread(
-                scraper_func,
-                url=url,
-                headless=False,
-                progress_callback=progress_callback,
-                log_capture=log_capture,
-                flush_callback=flush_callback,
-                browser_fingerprint=browser_fingerprint  # Pass user's browser fingerprint
-            )
+            # Get sort strategy parameters from metadata (for retry with different sort)
+            initial_sort = metadata.get('initial_sort') if metadata else None
+            sort_strategy = metadata.get('sort_strategy', 'auto') if metadata else 'auto'
+            max_reviews = metadata.get('max_reviews') if metadata else None
+            session_id = metadata.get('session_id') if metadata else None
+            if initial_sort:
+                log.info(f"Using initial_sort={initial_sort}, sort_strategy={sort_strategy} for job {job_id}")
+            if max_reviews:
+                log.info(f"Using max_reviews={max_reviews} limit for job {job_id} (testing mode)")
+
+            # Check if we have a session_id for browser reuse (session handoff from validation)
+            if session_id:
+                log.info(f"Using session handoff (session_id={session_id}) for job {job_id} - skipping navigation")
+                from scrapers.google_reviews.v1_2_0 import scrape_with_session
+                result = await asyncio.to_thread(
+                    scrape_with_session,
+                    session_id=session_id,
+                    max_reviews=max_reviews,
+                    progress_callback=progress_callback,
+                    flush_callback=flush_callback,
+                    sort_strategy=sort_strategy,
+                    initial_sort=initial_sort
+                )
+                # Add logs from session scraping
+                if 'logs' in result:
+                    for log_entry in result.get('logs', []):
+                        log_capture.entries.append(log_entry)
+            else:
+                # Run scraping with progress callback and shared log capture
+                # headless=False because Docker uses Xvfb virtual display
+                result = await asyncio.to_thread(
+                    scraper_func,
+                    url=url,
+                    headless=False,
+                    progress_callback=progress_callback,
+                    log_capture=log_capture,
+                    flush_callback=flush_callback,
+                    browser_fingerprint=browser_fingerprint,  # Pass user's browser fingerprint
+                    initial_sort=initial_sort,  # Sort order for retry strategy
+                    sort_strategy=sort_strategy,  # Sort strategy (auto, multi, single)
+                    max_reviews=max_reviews  # Optional limit for testing
+                )
+
+            # Update job metadata with tracking info from scraper result
+            tracking_metadata = {
+                'bot_detected': result.get('bot_detected', False),
+                'initial_sort_used': result.get('initial_sort_used', 'newest'),
+                'multi_sort': result.get('multi_sort', {}),
+            }
+            # Preserve existing sort_orders_attempted and add current sort
+            existing_sorts = metadata.get('sort_orders_attempted', []) if metadata else []
+            current_sort = result.get('initial_sort_used', 'newest')
+            if current_sort not in existing_sorts:
+                tracking_metadata['sort_orders_attempted'] = existing_sorts + [current_sort]
+            else:
+                tracking_metadata['sort_orders_attempted'] = existing_sorts
+
+            # Update metadata in database
+            await db.update_job_metadata(job_id, tracking_metadata)
+            if result.get('bot_detected'):
+                log.warn(f"Bot detection flagged for job {job_id} - sort button was hidden")

            if result['success']:
                # Save session fingerprint if captured
@@ -1746,6 +2174,18 @@ async def run_scraping_job(job_id: UUID):
                    await db.update_session_fingerprint(job_id, result['session_fingerprint'])
                    log.info(f"Saved session fingerprint for job {job_id}")

+                # Save business info to dedicated columns (queryable/indexable)
+                business_info = result.get('business_info', {})
+                if business_info:
+                    await db.update_business_info(
+                        job_id=job_id,
+                        business_name=business_info.get('name'),
+                        business_category=business_info.get('category'),
+                        business_address=business_info.get('address'),
+                        business_rating=business_info.get('rating')
+                    )
+                    log.info(f"Saved business info for job {job_id}: {business_info.get('name')} ({business_info.get('category')})")
+
                # Save results to database (including scraper logs and review topics)
                await db.save_job_result(
                    job_id=job_id,
--- a/core/database.py
+++ b/core/database.py
@@ -354,7 +354,11 @@ class DatabaseManager:
                    callback_status,
                    callback_attempts,
                    scraper_version,
-                    scraper_variant
+                    scraper_variant,
+                    business_name,
+                    business_category,
+                    business_address,
+                    business_rating
                FROM jobs
                WHERE job_id = $1
            """, job_id)
@@ -575,6 +579,69 @@ class DatabaseManager:

            log.debug(f"Updated session fingerprint for job {job_id}")

+    async def update_job_metadata(
+        self,
+        job_id: UUID,
+        metadata_updates: Dict[str, Any]
+    ):
+        """
+        Update specific fields in job metadata without overwriting existing data.
+
+        Args:
+            job_id: Job UUID
+            metadata_updates: Dictionary of metadata fields to update/add
+                - bot_detected: True if sort button was hidden (bot detection)
+                - initial_sort_used: Sort order used for scraping
+                - sort_orders_attempted: List of all sort orders tried
+                - multi_sort: Multi-sort completion info
+        """
+        async with self.pool.acquire() as conn:
+            # Merge new metadata with existing metadata using JSONB concatenation
+            await conn.execute("""
+                UPDATE jobs
+                SET
+                    metadata = COALESCE(metadata, '{}'::jsonb) || $2::jsonb,
+                    updated_at = NOW()
+                WHERE job_id = $1
+            """, job_id, json.dumps(metadata_updates))
+
+            log.debug(f"Updated job metadata for job {job_id}: {list(metadata_updates.keys())}")
+
+    async def update_business_info(
+        self,
+        job_id: UUID,
+        business_name: Optional[str] = None,
+        business_category: Optional[str] = None,
+        business_address: Optional[str] = None,
+        business_rating: Optional[float] = None
+    ):
+        """
+        Update business info columns for a job.
+
+        These are dedicated columns (not JSONB) for queryable business data
+        captured from the Google Maps page during scraping.
+
+        Args:
+            job_id: Job UUID
+            business_name: Business name from Google Maps
+            business_category: Business category (e.g., "Restaurant", "Toy store")
+            business_address: Full address from Google Maps
+            business_rating: Aggregate rating at time of scrape (e.g., 4.5)
+        """
+        async with self.pool.acquire() as conn:
+            await conn.execute("""
+                UPDATE jobs
+                SET
+                    business_name = COALESCE($2, business_name),
+                    business_category = COALESCE($3, business_category),
+                    business_address = COALESCE($4, business_address),
+                    business_rating = COALESCE($5, business_rating),
+                    updated_at = NOW()
+                WHERE job_id = $1
+            """, job_id, business_name, business_category, business_address, business_rating)
+
+            log.debug(f"Updated business info for job {job_id}: name={business_name}, category={business_category}")
+
    async def mark_job_partial(
        self,
        job_id: UUID,
@@ -674,7 +741,11 @@ class DatabaseManager:
                    callback_status,
                    callback_attempts,
                    scraper_version,
-                    scraper_variant
+                    scraper_variant,
+                    business_name,
+                    business_category,
+                    business_address,
+                    business_rating
                FROM jobs
                {where_clause}
                ORDER BY created_at DESC
--- a/db/apply_recategorization.py
+++ b/db/apply_recategorization.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""
+Apply the hierarchical recategorization to the database.
+
+This script:
+1. Gets all items currently in Other.Uncategorized
+2. Applies the categorization rules
+3. Updates the database with new paths
+4. Creates new level 2/3 categories as needed
+5. Updates category counts
+"""
+
+import psycopg2
+import re
+from collections import defaultdict
+
+# Import categorization functions
+import sys
+sys.path.insert(0, '/Users/agutierrez/Desktop/google-reviews-scraper-pro/db')
+from recategorize_hierarchical import get_sector_for_item, get_business_type_for_item
+
+DB_URL = "postgresql://scraper:scraper123@localhost:5437/scraper"
+
+def slugify(text):
+    """Convert text to slug format"""
+    slug = re.sub(r'[^\w\s-]', '', text)
+    slug = re.sub(r'[-\s]+', '_', slug)
+    return slug.strip('_')
+
+def main():
+    conn = psycopg2.connect(DB_URL)
+    cursor = conn.cursor()
+
+    # Get all items in Other.Uncategorized
+    cursor.execute("""
+        SELECT id, name, slug
+        FROM gbp_categories
+        WHERE path ~ 'Other.Uncategorized.*' AND level = 4
+        ORDER BY name
+    """)
+    other_items = cursor.fetchall()
+    print(f"Found {len(other_items)} items in Other.Uncategorized")
+
+    # Get existing paths
+    cursor.execute("SELECT path::text, id FROM gbp_categories")
+    existing_paths = {row[0]: row[1] for row in cursor.fetchall()}
+    print(f"Found {len(existing_paths)} existing paths")
+
+    # Categorize items
+    moves = []  # (item_id, item_name, item_slug, new_sector, new_btype)
+    stats = defaultdict(int)
+
+    for item_id, name, slug in other_items:
+        sector = get_sector_for_item(name)
+        btype = get_business_type_for_item(name, sector)
+
+        if sector != 'Other':
+            moves.append((item_id, name, slug, sector, btype))
+            stats[sector] += 1
+        else:
+            stats['Still_Other'] += 1
+
+    print(f"\nCategorization results:")
+    for sector, count in sorted(stats.items(), key=lambda x: -x[1]):
+        print(f"  {sector}: {count}")
+
+    print(f"\nTotal to move: {len(moves)}")
+    print(f"Remaining in Other: {stats.get('Still_Other', 0)}")
+
+    # Ask for confirmation
+    response = input("\nProceed with database updates? (yes/no): ")
+    if response.lower() != 'yes':
+        print("Aborted.")
+        conn.close()
+        return
+
+    # Process moves
+    created_paths = set()
+    updated = 0
+    errors = []
+
+    for item_id, name, slug, sector, btype in moves:
+        try:
+            sector_slug = slugify(sector)
+            btype_slug = slugify(btype)
+
+            # Check if sector exists
+            sector_path = sector_slug
+            if sector_path not in existing_paths:
+                print(f"  [ERROR] Sector not found: {sector_path} for '{name}'")
+                errors.append((name, f"Sector not found: {sector_path}"))
+                continue
+
+            # Check/create business type (level 2)
+            btype_path = f"{sector_path}.{btype_slug}"
+            if btype_path not in existing_paths and btype_path not in created_paths:
+                cursor.execute("""
+                    INSERT INTO gbp_categories (name, slug, path, level, parent_id, category_count)
+                    SELECT %s, %s, %s::ltree, 2, id, 0
+                    FROM gbp_categories WHERE path = %s::ltree
+                    ON CONFLICT (path) DO NOTHING
+                    RETURNING id
+                """, (btype, btype_slug, btype_path, sector_path))
+                result = cursor.fetchone()
+                if result:
+                    existing_paths[btype_path] = result[0]
+                    created_paths.add(btype_path)
+                    print(f"  [NEW] Created business type: {btype_path}")
+
+            # Check/create sub-category (level 3) - use "General" as default
+            subcat = "General"
+            subcat_slug = "General"
+            subcat_path = f"{btype_path}.{subcat_slug}"
+            if subcat_path not in existing_paths and subcat_path not in created_paths:
+                cursor.execute("""
+                    INSERT INTO gbp_categories (name, slug, path, level, parent_id, category_count)
+                    SELECT %s, %s, %s::ltree, 3, id, 0
+                    FROM gbp_categories WHERE path = %s::ltree
+                    ON CONFLICT (path) DO NOTHING
+                    RETURNING id
+                """, (subcat, subcat_slug, subcat_path, btype_path))
+                result = cursor.fetchone()
+                if result:
+                    existing_paths[subcat_path] = result[0]
+                    created_paths.add(subcat_path)
+                    print(f"  [NEW] Created sub-category: {subcat_path}")
+
+            # Update the item's path
+            new_path = f"{subcat_path}.{slug}"
+            cursor.execute("""
+                UPDATE gbp_categories
+                SET path = %s::ltree,
+                    parent_id = (SELECT id FROM gbp_categories WHERE path = %s::ltree)
+                WHERE id = %s
+            """, (new_path, subcat_path, item_id))
+            updated += 1
+
+        except Exception as e:
+            errors.append((name, str(e)))
+            print(f"  [ERROR] {name}: {e}")
+
+    # Update category counts
+    print("\nUpdating category counts...")
+    cursor.execute("""
+        WITH counts AS (
+            SELECT
+                parent_id,
+                COUNT(*) as cnt
+            FROM gbp_categories
+            WHERE parent_id IS NOT NULL
+            GROUP BY parent_id
+        )
+        UPDATE gbp_categories g
+        SET category_count = COALESCE(c.cnt, 0)
+        FROM counts c
+        WHERE g.id = c.parent_id
+    """)
+
+    # Also reset counts for categories that no longer have children
+    cursor.execute("""
+        UPDATE gbp_categories
+        SET category_count = 0
+        WHERE id NOT IN (
+            SELECT DISTINCT parent_id FROM gbp_categories WHERE parent_id IS NOT NULL
+        )
+        AND level < 4
+    """)
+
+    conn.commit()
+
+    print(f"\n{'='*60}")
+    print(f"SUMMARY")
+    print(f"{'='*60}")
+    print(f"Items moved: {updated}")
+    print(f"New paths created: {len(created_paths)}")
+    print(f"Errors: {len(errors)}")
+
+    if errors:
+        print("\nErrors:")
+        for name, err in errors[:10]:
+            print(f"  - {name}: {err}")
+        if len(errors) > 10:
+            print(f"  ... and {len(errors) - 10} more")
+
+    # Show final stats
+    cursor.execute("""
+        SELECT
+            SPLIT_PART(path::text, '.', 1) as sector,
+            COUNT(*) as count
+        FROM gbp_categories
+        WHERE level = 4
+        GROUP BY sector
+        ORDER BY count DESC
+    """)
+    print("\nFinal category distribution:")
+    for sector, count in cursor.fetchall():
+        print(f"  {sector}: {count}")
+
+    conn.close()
+
+if __name__ == '__main__':
+    main()
--- a/db/import_categories.py
+++ b/db/import_categories.py
@@ -0,0 +1,977 @@
+#!/usr/bin/env python3
+"""
+Import Google Business Profile categories into PostgreSQL with ltree hierarchy.
+
+Usage:
+    python import_categories.py [--csv-path PATH] [--db-url URL]
+
+Example:
+    python import_categories.py --csv-path ./categories.csv --db-url postgresql://scraper:scraper123@localhost:5437/scraper
+"""
+
+import csv
+import re
+import os
+import argparse
+from typing import Optional
+
+try:
+    import psycopg2
+    from psycopg2.extras import execute_values
+    HAS_PSYCOPG2 = True
+except ImportError:
+    HAS_PSYCOPG2 = False
+
+# Default paths
+DEFAULT_CSV_PATH = os.path.expanduser("~/Downloads/Google Business Profile Categories (2025 List) - Category List (English).csv")
+DEFAULT_DB_URL = "postgresql://scraper:scraper123@localhost:5437/scraper"
+
+
+def slugify(text: str) -> str:
+    """Convert text to ltree-safe slug."""
+    # Replace special characters with underscores
+    slug = re.sub(r'[^a-zA-Z0-9]+', '_', text)
+    # Remove leading/trailing underscores
+    slug = slug.strip('_')
+    # Ensure it starts with a letter (ltree requirement)
+    if slug and not slug[0].isalpha():
+        slug = 'cat_' + slug
+    return slug or 'unknown'
+
+
+def categorize_category(cat: str) -> tuple:
+    """
+    Categorize a GBP category into 4-level hierarchy.
+    Returns: (level1, level2, level3, level4)
+    """
+    c = cat.lower()
+
+    # === FOOD & DINING ===
+    if 'restaurant' in c:
+        if any(x in c for x in ['fast food', 'drive-in', 'takeaway', 'takeout', 'quick service']):
+            return ("Food & Dining", "Restaurants", "Fast Food & Quick Service", cat)
+        # Cuisine types
+        return ("Food & Dining", "Restaurants", "By Cuisine", cat)
+
+    if any(x in c for x in ['cafe', 'coffee shop', 'tea house', 'tea room', 'espresso bar']):
+        return ("Food & Dining", "Cafes & Coffee", "Coffee Shops", cat)
+
+    if any(x in c for x in ['bar', 'pub', 'nightclub', 'night club', 'cocktail', 'wine bar', 'beer', 'lounge']):
+        if 'gay' in c or 'lesbian' in c:
+            return ("Food & Dining", "Bars & Nightlife", "LGBTQ+ Venues", cat)
+        if 'karaoke' in c:
+            return ("Food & Dining", "Bars & Nightlife", "Karaoke", cat)
+        return ("Food & Dining", "Bars & Nightlife", "Bars & Pubs", cat)
+
+    if any(x in c for x in ['bakery', 'pastry', 'cake', 'donut', 'dessert', 'ice cream', 'frozen yogurt', 'candy', 'chocolate', 'confection']):
+        return ("Food & Dining", "Bakeries & Desserts", "Sweet Shops", cat)
+
+    if any(x in c for x in ['caterer', 'catering']):
+        return ("Food & Dining", "Food Services", "Catering", cat)
+
+    if any(x in c for x in ['brewery', 'winery', 'distillery', 'vineyard']):
+        return ("Food & Dining", "Beverage Production", "Producers", cat)
+
+    if any(x in c for x in ['food truck', 'food stand', 'food stall', 'food court']):
+        return ("Food & Dining", "Quick Service", "Street Food", cat)
+
+    # === RETAIL & SHOPPING ===
+    if 'store' in c or 'shop' in c:
+        if any(x in c for x in ['clothing', 'fashion', 'shoe', 'dress', 'apparel', 'wear', 'boutique', 'tailor']):
+            return ("Retail & Shopping", "Clothing & Fashion", "Apparel Stores", cat)
+        if any(x in c for x in ['electronic', 'computer', 'phone', 'appliance', 'tv', 'audio', 'video game']):
+            return ("Retail & Shopping", "Electronics", "Electronics Stores", cat)
+        if any(x in c for x in ['furniture', 'home decor', 'kitchen', 'bed', 'mattress', 'carpet', 'curtain', 'lighting']):
+            return ("Retail & Shopping", "Home & Garden", "Home Furnishings", cat)
+        if any(x in c for x in ['grocery', 'supermarket', 'food', 'beverage', 'wine', 'liquor', 'butcher', 'fish', 'fruit', 'vegetable']):
+            return ("Retail & Shopping", "Food & Grocery", "Grocery Stores", cat)
+        if any(x in c for x in ['book', 'stationery', 'office supply', 'paper']):
+            return ("Retail & Shopping", "Books & Office", "Book Stores", cat)
+        if any(x in c for x in ['pet', 'animal']):
+            return ("Retail & Shopping", "Pet Supplies", "Pet Stores", cat)
+        if any(x in c for x in ['toy', 'game', 'hobby']):
+            return ("Retail & Shopping", "Toys & Hobbies", "Toy Stores", cat)
+        if any(x in c for x in ['jewelry', 'watch', 'gold', 'diamond']):
+            return ("Retail & Shopping", "Jewelry & Watches", "Jewelry Stores", cat)
+        if any(x in c for x in ['sport', 'athletic', 'fitness', 'outdoor', 'camping', 'fishing', 'hunting']):
+            return ("Retail & Shopping", "Sports & Outdoors", "Sporting Goods", cat)
+        if any(x in c for x in ['music', 'instrument', 'record', 'vinyl']):
+            return ("Retail & Shopping", "Music & Entertainment", "Music Stores", cat)
+        if any(x in c for x in ['art', 'craft', 'fabric', 'sewing', 'yarn', 'knitting']):
+            return ("Retail & Shopping", "Arts & Crafts", "Art Supply Stores", cat)
+        if any(x in c for x in ['beauty', 'cosmetic', 'perfume', 'makeup']):
+            return ("Retail & Shopping", "Beauty & Cosmetics", "Beauty Stores", cat)
+        if any(x in c for x in ['pharmacy', 'drug', 'medicine', 'health']):
+            return ("Retail & Shopping", "Health & Pharmacy", "Pharmacies", cat)
+        if any(x in c for x in ['garden', 'plant', 'flower', 'nursery', 'landscap']):
+            return ("Retail & Shopping", "Home & Garden", "Garden Centers", cat)
+        if any(x in c for x in ['hardware', 'tool', 'building', 'lumber', 'paint']):
+            return ("Retail & Shopping", "Hardware & Building", "Hardware Stores", cat)
+        if any(x in c for x in ['antique', 'vintage', 'thrift', 'consignment', 'second hand', 'used']):
+            return ("Retail & Shopping", "Secondhand & Vintage", "Thrift Stores", cat)
+        return ("Retail & Shopping", "Specialty Retail", "Other Stores", cat)
+
+    if any(x in c for x in ['supplier', 'wholesaler', 'distributor', 'exporter', 'importer']):
+        if any(x in c for x in ['food', 'beverage', 'meat', 'seafood', 'produce']):
+            return ("Retail & Shopping", "Wholesale & Distribution", "Food Wholesale", cat)
+        if any(x in c for x in ['building', 'construction', 'lumber', 'concrete', 'steel']):
+            return ("Retail & Shopping", "Wholesale & Distribution", "Building Materials", cat)
+        if any(x in c for x in ['industrial', 'machinery', 'equipment']):
+            return ("Retail & Shopping", "Wholesale & Distribution", "Industrial Supplies", cat)
+        return ("Retail & Shopping", "Wholesale & Distribution", "General Wholesale", cat)
+
+    if 'market' in c and 'marketing' not in c:
+        if 'flea' in c or 'antique' in c:
+            return ("Retail & Shopping", "Markets", "Flea Markets", cat)
+        if 'farmer' in c:
+            return ("Retail & Shopping", "Markets", "Farmers Markets", cat)
+        return ("Retail & Shopping", "Markets", "General Markets", cat)
+
+    # === AUTOMOTIVE ===
+    if 'dealer' in c:
+        car_brands = ['abarth', 'acura', 'alfa romeo', 'aston martin', 'audi', 'bentley', 'bmw', 'bugatti',
+                      'buick', 'cadillac', 'chevrolet', 'chrysler', 'citroen', 'cupra', 'dacia', 'daihatsu',
+                      'dodge', 'ferrari', 'fiat', 'ford', 'genesis', 'gmc', 'honda', 'hummer', 'hyundai',
+                      'infiniti', 'isuzu', 'jaguar', 'jeep', 'kia', 'lamborghini', 'lancia', 'land rover',
+                      'lexus', 'lincoln', 'lotus', 'maserati', 'mazda', 'mclaren', 'mercedes', 'mini',
+                      'mitsubishi', 'nissan', 'opel', 'peugeot', 'porsche', 'ram', 'renault', 'rolls-royce',
+                      'saab', 'seat', 'skoda', 'smart', 'subaru', 'suzuki', 'tesla', 'toyota', 'volkswagen',
+                      'volvo', 'yamaha', 'harley', 'ducati', 'kawasaki', 'triumph', 'vespa', 'piaggio']
+        if any(b in c for b in car_brands):
+            if 'motorcycle' in c or any(x in c for x in ['harley', 'ducati', 'kawasaki', 'triumph', 'vespa']):
+                return ("Automotive", "Dealers", "Motorcycle Brands", cat)
+            return ("Automotive", "Dealers", "Car Brands", cat)
+        if any(x in c for x in ['motorcycle', 'scooter', 'moped']):
+            return ("Automotive", "Dealers", "Motorcycle Dealers", cat)
+        if any(x in c for x in ['truck', 'commercial vehicle', 'trailer']):
+            return ("Automotive", "Dealers", "Truck & Commercial", cat)
+        if any(x in c for x in ['boat', 'yacht', 'marine', 'jet ski']):
+            return ("Automotive", "Dealers", "Marine & Boats", cat)
+        if any(x in c for x in ['rv', 'camper', 'motorhome', 'caravan']):
+            return ("Automotive", "Dealers", "RV & Campers", cat)
+        if any(x in c for x in ['atv', 'quad', 'off-road', 'utv']):
+            return ("Automotive", "Dealers", "ATV & Off-Road", cat)
+        if 'used' in c or 'pre-owned' in c:
+            return ("Automotive", "Dealers", "Used Vehicles", cat)
+        return ("Automotive", "Dealers", "Other Dealers", cat)
+
+    if any(x in c for x in ['car wash', 'auto detailing', 'car detailing']):
+        return ("Automotive", "Vehicle Care", "Cleaning & Detailing", cat)
+
+    if any(x in c for x in ['car rental', 'auto rental', 'vehicle rental', 'truck rental']):
+        return ("Automotive", "Rental Services", "Vehicle Rental", cat)
+
+    if any(x in c for x in ['car repair', 'auto repair', 'mechanic', 'garage', 'auto body', 'collision']):
+        return ("Automotive", "Repair & Maintenance", "Auto Repair", cat)
+
+    if any(x in c for x in ['tire', 'tyre', 'wheel']):
+        return ("Automotive", "Parts & Accessories", "Tires & Wheels", cat)
+
+    if any(x in c for x in ['auto part', 'car part', 'auto accessories']):
+        return ("Automotive", "Parts & Accessories", "Auto Parts", cat)
+
+    if any(x in c for x in ['driving school', 'driving instruction']):
+        return ("Automotive", "Training", "Driving Schools", cat)
+
+    if any(x in c for x in ['parking', 'car park', 'garage']):
+        if 'repair' not in c and 'mechanic' not in c:
+            return ("Automotive", "Parking", "Parking Facilities", cat)
+
+    if any(x in c for x in ['gas station', 'petrol', 'fuel', 'charging station', 'ev charging']):
+        return ("Automotive", "Fuel & Charging", "Fuel Stations", cat)
+
+    # === HEALTHCARE ===
+    if any(x in c for x in ['hospital']):
+        if 'animal' in c or 'veterinar' in c:
+            return ("Healthcare", "Veterinary", "Animal Hospitals", cat)
+        if 'children' in c or 'pediatric' in c:
+            return ("Healthcare", "Hospitals", "Pediatric Hospitals", cat)
+        if 'mental' in c or 'psychiatric' in c:
+            return ("Healthcare", "Mental Health", "Psychiatric Hospitals", cat)
+        return ("Healthcare", "Hospitals", "General Hospitals", cat)
+
+    if any(x in c for x in ['clinic']):
+        if 'dental' in c:
+            return ("Healthcare", "Dental", "Dental Clinics", cat)
+        if 'eye' in c or 'vision' in c or 'optical' in c:
+            return ("Healthcare", "Vision Care", "Eye Clinics", cat)
+        if 'fertility' in c or 'ivf' in c:
+            return ("Healthcare", "Specialty Care", "Fertility Clinics", cat)
+        if 'skin' in c or 'dermatol' in c:
+            return ("Healthcare", "Specialty Care", "Dermatology", cat)
+        if 'physical therapy' in c or 'physiotherapy' in c or 'rehab' in c:
+            return ("Healthcare", "Rehabilitation", "Physical Therapy", cat)
+        return ("Healthcare", "Clinics", "Medical Clinics", cat)
+
+    if any(x in c for x in ['doctor', 'physician']):
+        return ("Healthcare", "Medical Practitioners", "Doctors", cat)
+
+    if any(x in c for x in ['dentist', 'dental', 'orthodont', 'endodont', 'periodont']):
+        return ("Healthcare", "Dental", "Dental Services", cat)
+
+    if any(x in c for x in ['surgeon', 'surgery']):
+        if 'plastic' in c or 'cosmetic' in c:
+            return ("Healthcare", "Specialty Care", "Cosmetic Surgery", cat)
+        return ("Healthcare", "Medical Practitioners", "Surgeons", cat)
+
+    if any(x in c for x in ['psycholog', 'psychiatr', 'mental health', 'counselor', 'therapist']):
+        if 'marriage' in c or 'family' in c:
+            return ("Healthcare", "Mental Health", "Family Counseling", cat)
+        if 'addiction' in c or 'substance' in c:
+            return ("Healthcare", "Mental Health", "Addiction Treatment", cat)
+        return ("Healthcare", "Mental Health", "Mental Health Services", cat)
+
+    if any(x in c for x in ['chiropract']):
+        return ("Healthcare", "Alternative Medicine", "Chiropractic", cat)
+
+    if any(x in c for x in ['acupuncture', 'acupuncturist']):
+        return ("Healthcare", "Alternative Medicine", "Acupuncture", cat)
+
+    if any(x in c for x in ['naturopath', 'homeopath', 'ayurved', 'holistic']):
+        return ("Healthcare", "Alternative Medicine", "Natural Medicine", cat)
+
+    if any(x in c for x in ['optometrist', 'optician', 'eye doctor', 'ophthalmol']):
+        return ("Healthcare", "Vision Care", "Eye Care", cat)
+
+    if any(x in c for x in ['pharmacy', 'drugstore', 'apothecary']):
+        return ("Healthcare", "Pharmacies", "Retail Pharmacies", cat)
+
+    if any(x in c for x in ['veterinar', 'vet ', 'animal clinic', 'pet clinic']):
+        return ("Healthcare", "Veterinary", "Veterinary Services", cat)
+
+    if any(x in c for x in ['nursing home', 'assisted living', 'senior care', 'elder care', 'retirement home']):
+        return ("Healthcare", "Senior Care", "Senior Living", cat)
+
+    if any(x in c for x in ['lab', 'laboratory', 'diagnostic', 'imaging', 'x-ray', 'mri', 'radiology']):
+        return ("Healthcare", "Diagnostics", "Medical Labs", cat)
+
+    if any(x in c for x in ['ambulance', 'emergency', 'urgent care']):
+        return ("Healthcare", "Emergency Services", "Emergency Care", cat)
+
+    # === EDUCATION ===
+    if 'school' in c or 'academy' in c:
+        if any(x in c for x in ['preschool', 'kindergarten', 'nursery', 'daycare', 'pre-school']):
+            return ("Education", "Early Childhood", "Preschools", cat)
+        if any(x in c for x in ['elementary', 'primary']):
+            return ("Education", "K-12 Schools", "Elementary Schools", cat)
+        if any(x in c for x in ['middle', 'junior high']):
+            return ("Education", "K-12 Schools", "Middle Schools", cat)
+        if any(x in c for x in ['high school', 'secondary']):
+            return ("Education", "K-12 Schools", "High Schools", cat)
+        if any(x in c for x in ['boarding']):
+            return ("Education", "K-12 Schools", "Boarding Schools", cat)
+        if any(x in c for x in ['driving']):
+            return ("Automotive", "Training", "Driving Schools", cat)
+        if any(x in c for x in ['language', 'english', 'spanish', 'french', 'german', 'chinese', 'japanese']):
+            return ("Education", "Language Learning", "Language Schools", cat)
+        if any(x in c for x in ['art', 'music', 'dance', 'drama', 'theater', 'acting']):
+            return ("Education", "Arts Education", "Arts Schools", cat)
+        if any(x in c for x in ['martial art', 'karate', 'judo', 'taekwondo', 'kung fu', 'aikido', 'boxing']):
+            return ("Education", "Sports Training", "Martial Arts Schools", cat)
+        if any(x in c for x in ['beauty', 'cosmetology', 'barber']):
+            return ("Education", "Vocational Training", "Beauty Schools", cat)
+        if any(x in c for x in ['cooking', 'culinary', 'chef']):
+            return ("Education", "Vocational Training", "Culinary Schools", cat)
+        if any(x in c for x in ['business', 'mba']):
+            return ("Education", "Higher Education", "Business Schools", cat)
+        if any(x in c for x in ['medical', 'nursing', 'dental']):
+            return ("Education", "Higher Education", "Medical Schools", cat)
+        if any(x in c for x in ['law']):
+            return ("Education", "Higher Education", "Law Schools", cat)
+        if any(x in c for x in ['flight', 'aviation', 'pilot']):
+            return ("Education", "Vocational Training", "Aviation Schools", cat)
+        if any(x in c for x in ['computer', 'it ', 'coding', 'programming', 'software']):
+            return ("Education", "Technology Training", "Computer Schools", cat)
+        if any(x in c for x in ['trade', 'technical', 'vocational']):
+            return ("Education", "Vocational Training", "Trade Schools", cat)
+        return ("Education", "Specialty Schools", "Other Schools", cat)
+
+    if any(x in c for x in ['university', 'college']):
+        if 'community' in c:
+            return ("Education", "Higher Education", "Community Colleges", cat)
+        return ("Education", "Higher Education", "Universities", cat)
+
+    if any(x in c for x in ['tutor', 'tutoring']):
+        return ("Education", "Tutoring", "Private Tutoring", cat)
+
+    if any(x in c for x in ['training center', 'training program', 'training institute']):
+        return ("Education", "Professional Training", "Training Centers", cat)
+
+    if any(x in c for x in ['library']):
+        return ("Education", "Libraries", "Public Libraries", cat)
+
+    # === PROFESSIONAL SERVICES ===
+    if any(x in c for x in ['lawyer', 'attorney', 'law firm', 'legal']):
+        if any(x in c for x in ['immigration']):
+            return ("Professional Services", "Legal", "Immigration Law", cat)
+        if any(x in c for x in ['criminal', 'defense']):
+            return ("Professional Services", "Legal", "Criminal Law", cat)
+        if any(x in c for x in ['family', 'divorce']):
+            return ("Professional Services", "Legal", "Family Law", cat)
+        if any(x in c for x in ['personal injury', 'accident']):
+            return ("Professional Services", "Legal", "Personal Injury", cat)
+        if any(x in c for x in ['real estate', 'property']):
+            return ("Professional Services", "Legal", "Real Estate Law", cat)
+        if any(x in c for x in ['business', 'corporate', 'commercial']):
+            return ("Professional Services", "Legal", "Business Law", cat)
+        return ("Professional Services", "Legal", "General Legal", cat)
+
+    if any(x in c for x in ['accountant', 'accounting', 'bookkeep', 'tax']):
+        return ("Professional Services", "Financial Services", "Accounting", cat)
+
+    if any(x in c for x in ['consultant', 'consulting', 'advisor']):
+        if any(x in c for x in ['business', 'management']):
+            return ("Professional Services", "Consulting", "Business Consulting", cat)
+        if any(x in c for x in ['it ', 'technology', 'computer']):
+            return ("Professional Services", "Consulting", "IT Consulting", cat)
+        if any(x in c for x in ['marketing', 'advertising']):
+            return ("Professional Services", "Consulting", "Marketing Consulting", cat)
+        return ("Professional Services", "Consulting", "General Consulting", cat)
+
+    if any(x in c for x in ['notary', 'notarial']):
+        return ("Professional Services", "Legal", "Notary Services", cat)
+
+    if any(x in c for x in ['architect', 'architecture']):
+        return ("Professional Services", "Design", "Architecture", cat)
+
+    if any(x in c for x in ['engineer', 'engineering']):
+        if 'civil' in c:
+            return ("Professional Services", "Engineering", "Civil Engineering", cat)
+        if 'structural' in c:
+            return ("Professional Services", "Engineering", "Structural Engineering", cat)
+        if 'mechanical' in c:
+            return ("Professional Services", "Engineering", "Mechanical Engineering", cat)
+        if 'electrical' in c:
+            return ("Professional Services", "Engineering", "Electrical Engineering", cat)
+        return ("Professional Services", "Engineering", "General Engineering", cat)
+
+    if any(x in c for x in ['agency']):
+        if any(x in c for x in ['advertising', 'marketing', 'creative', 'digital']):
+            return ("Professional Services", "Marketing & Advertising", "Agencies", cat)
+        if any(x in c for x in ['real estate', 'property']):
+            return ("Real Estate", "Agencies", "Real Estate Agencies", cat)
+        if any(x in c for x in ['insurance']):
+            return ("Finance & Insurance", "Insurance", "Insurance Agencies", cat)
+        if any(x in c for x in ['travel', 'tour']):
+            return ("Hospitality & Travel", "Travel Services", "Travel Agencies", cat)
+        if any(x in c for x in ['employment', 'staffing', 'recruitment', 'temp']):
+            return ("Professional Services", "HR Services", "Staffing Agencies", cat)
+        return ("Professional Services", "Agencies", "Other Agencies", cat)
+
+    if any(x in c for x in ['photographer', 'photography', 'photo studio']):
+        return ("Professional Services", "Creative Services", "Photography", cat)
+
+    if any(x in c for x in ['graphic design', 'web design', 'design studio']):
+        return ("Professional Services", "Creative Services", "Design Services", cat)
+
+    if any(x in c for x in ['translator', 'translation', 'interpreter']):
+        return ("Professional Services", "Language Services", "Translation", cat)
+
+    if any(x in c for x in ['printing', 'print shop', 'copy']):
+        return ("Professional Services", "Business Services", "Printing Services", cat)
+
+    # === HOME SERVICES ===
+    if any(x in c for x in ['plumber', 'plumbing']):
+        return ("Home Services", "Plumbing", "Plumbers", cat)
+
+    if any(x in c for x in ['electrician', 'electrical']):
+        if 'contractor' in c or 'service' in c:
+            return ("Home Services", "Electrical", "Electricians", cat)
+
+    if any(x in c for x in ['hvac', 'air conditioning', 'heating', 'furnace']):
+        return ("Home Services", "HVAC", "Heating & Cooling", cat)
+
+    if any(x in c for x in ['roofing', 'roofer']):
+        return ("Home Services", "Roofing", "Roofing Services", cat)
+
+    if any(x in c for x in ['painter', 'painting']):
+        if 'house' in c or 'residential' in c or 'contractor' in c:
+            return ("Home Services", "Painting", "House Painters", cat)
+
+    if any(x in c for x in ['landscap', 'lawn', 'garden']):
+        if 'service' in c or 'company' in c or 'contractor' in c:
+            return ("Home Services", "Landscaping", "Landscaping Services", cat)
+
+    if any(x in c for x in ['cleaning service', 'maid', 'housekeep', 'janitorial']):
+        return ("Home Services", "Cleaning", "Cleaning Services", cat)
+
+    if any(x in c for x in ['pest control', 'exterminator']):
+        return ("Home Services", "Pest Control", "Exterminators", cat)
+
+    if any(x in c for x in ['locksmith']):
+        return ("Home Services", "Security", "Locksmiths", cat)
+
+    if any(x in c for x in ['moving company', 'mover', 'relocation']):
+        return ("Home Services", "Moving", "Moving Services", cat)
+
+    if any(x in c for x in ['contractor']):
+        if 'general' in c:
+            return ("Home Services", "Construction", "General Contractors", cat)
+        return ("Home Services", "Construction", "Contractors", cat)
+
+    if any(x in c for x in ['carpenter', 'carpentry']):
+        return ("Home Services", "Construction", "Carpenters", cat)
+
+    if any(x in c for x in ['flooring', 'floor']):
+        if 'service' in c or 'contractor' in c or 'installation' in c:
+            return ("Home Services", "Flooring", "Floor Installation", cat)
+
+    if any(x in c for x in ['window', 'glass']):
+        if 'repair' in c or 'installation' in c or 'service' in c:
+            return ("Home Services", "Windows & Doors", "Window Services", cat)
+
+    if any(x in c for x in ['pool', 'spa']):
+        if 'service' in c or 'cleaning' in c or 'maintenance' in c:
+            return ("Home Services", "Pool & Spa", "Pool Services", cat)
+
+    if any(x in c for x in ['appliance repair', 'appliance service']):
+        return ("Home Services", "Appliance Repair", "Appliance Services", cat)
+
+    if any(x in c for x in ['handyman']):
+        return ("Home Services", "General Repair", "Handyman Services", cat)
+
+    if any(x in c for x in ['interior design', 'decorator']):
+        return ("Home Services", "Design", "Interior Design", cat)
+
+    # === PERSONAL SERVICES ===
+    if any(x in c for x in ['salon', 'hair', 'hairdress', 'stylist']):
+        return ("Personal Services", "Hair Care", "Hair Salons", cat)
+
+    if any(x in c for x in ['barber']):
+        if 'shop' in c or not 'school' in c:
+            return ("Personal Services", "Hair Care", "Barber Shops", cat)
+
+    if any(x in c for x in ['nail', 'manicure', 'pedicure']):
+        return ("Personal Services", "Nail Care", "Nail Salons", cat)
+
+    if any(x in c for x in ['spa']):
+        if 'day spa' in c or 'medical spa' in c or ('service' not in c and 'pool' not in c):
+            return ("Personal Services", "Spa & Wellness", "Day Spas", cat)
+
+    if any(x in c for x in ['massage']):
+        return ("Personal Services", "Massage", "Massage Therapy", cat)
+
+    if any(x in c for x in ['beauty']):
+        if 'salon' in c or 'parlor' in c:
+            return ("Personal Services", "Beauty", "Beauty Salons", cat)
+
+    if any(x in c for x in ['tattoo']):
+        return ("Personal Services", "Body Art", "Tattoo Shops", cat)
+
+    if any(x in c for x in ['piercing']):
+        return ("Personal Services", "Body Art", "Piercing Studios", cat)
+
+    if any(x in c for x in ['tanning']):
+        return ("Personal Services", "Tanning", "Tanning Salons", cat)
+
+    if any(x in c for x in ['tailor', 'alteration', 'seamstress']):
+        return ("Personal Services", "Clothing Care", "Tailoring", cat)
+
+    if any(x in c for x in ['dry clean', 'laundry', 'laundromat']):
+        return ("Personal Services", "Laundry", "Laundry Services", cat)
+
+    if any(x in c for x in ['personal trainer', 'fitness trainer']):
+        return ("Personal Services", "Fitness", "Personal Training", cat)
+
+    # === ENTERTAINMENT & RECREATION ===
+    if any(x in c for x in ['movie theater', 'cinema', 'multiplex']):
+        return ("Entertainment", "Movies", "Movie Theaters", cat)
+
+    if any(x in c for x in ['theater', 'theatre']):
+        if 'movie' not in c:
+            return ("Entertainment", "Performing Arts", "Theaters", cat)
+
+    if any(x in c for x in ['museum']):
+        if 'art' in c:
+            return ("Entertainment", "Museums", "Art Museums", cat)
+        if 'history' in c or 'historical' in c:
+            return ("Entertainment", "Museums", "History Museums", cat)
+        if 'science' in c or 'natural' in c:
+            return ("Entertainment", "Museums", "Science Museums", cat)
+        if 'children' in c or 'kid' in c:
+            return ("Entertainment", "Museums", "Children's Museums", cat)
+        return ("Entertainment", "Museums", "General Museums", cat)
+
+    if any(x in c for x in ['art gallery', 'gallery']):
+        return ("Entertainment", "Arts", "Art Galleries", cat)
+
+    if any(x in c for x in ['amusement park', 'theme park', 'water park']):
+        return ("Entertainment", "Amusement", "Theme Parks", cat)
+
+    if any(x in c for x in ['zoo', 'aquarium', 'wildlife']):
+        return ("Entertainment", "Wildlife", "Zoos & Aquariums", cat)
+
+    if any(x in c for x in ['bowling']):
+        return ("Entertainment", "Games & Recreation", "Bowling", cat)
+
+    if any(x in c for x in ['arcade', 'video game']):
+        return ("Entertainment", "Games & Recreation", "Arcades", cat)
+
+    if any(x in c for x in ['escape room']):
+        return ("Entertainment", "Games & Recreation", "Escape Rooms", cat)
+
+    if any(x in c for x in ['casino', 'gambling']):
+        return ("Entertainment", "Gambling", "Casinos", cat)
+
+    if any(x in c for x in ['concert', 'music venue', 'live music']):
+        return ("Entertainment", "Music Venues", "Concert Halls", cat)
+
+    if any(x in c for x in ['gym', 'fitness center', 'health club']):
+        return ("Entertainment", "Fitness", "Gyms", cat)
+
+    if any(x in c for x in ['yoga']):
+        if 'studio' in c or 'center' in c:
+            return ("Entertainment", "Fitness", "Yoga Studios", cat)
+
+    if any(x in c for x in ['pilates']):
+        return ("Entertainment", "Fitness", "Pilates Studios", cat)
+
+    if any(x in c for x in ['swimming pool', 'swim']):
+        return ("Entertainment", "Sports", "Swimming Pools", cat)
+
+    if any(x in c for x in ['golf']):
+        if 'course' in c or 'club' in c:
+            return ("Entertainment", "Sports", "Golf Courses", cat)
+
+    if any(x in c for x in ['tennis']):
+        return ("Entertainment", "Sports", "Tennis Courts", cat)
+
+    if any(x in c for x in ['stadium', 'arena', 'sports complex']):
+        return ("Entertainment", "Venues", "Sports Venues", cat)
+
+    if any(x in c for x in ['park']):
+        if 'amusement' not in c and 'theme' not in c:
+            if 'national' in c or 'state' in c:
+                return ("Entertainment", "Parks", "National Parks", cat)
+            if 'dog' in c:
+                return ("Entertainment", "Parks", "Dog Parks", cat)
+            return ("Entertainment", "Parks", "Public Parks", cat)
+
+    if any(x in c for x in ['recreation center', 'community center']):
+        return ("Entertainment", "Recreation", "Community Centers", cat)
+
+    if any(x in c for x in ['club']):
+        if 'night' in c:
+            return ("Food & Dining", "Bars & Nightlife", "Night Clubs", cat)
+        if 'country' in c:
+            return ("Entertainment", "Sports", "Country Clubs", cat)
+        if 'sport' in c or 'athletic' in c:
+            return ("Entertainment", "Sports", "Sports Clubs", cat)
+        if 'social' in c:
+            return ("Entertainment", "Social", "Social Clubs", cat)
+
+    # === HOSPITALITY & TRAVEL ===
+    if any(x in c for x in ['hotel', 'motel', 'inn']):
+        if 'boutique' in c:
+            return ("Hospitality & Travel", "Lodging", "Boutique Hotels", cat)
+        if 'resort' in c:
+            return ("Hospitality & Travel", "Lodging", "Resorts", cat)
+        if 'budget' in c or 'economy' in c:
+            return ("Hospitality & Travel", "Lodging", "Budget Hotels", cat)
+        return ("Hospitality & Travel", "Lodging", "Hotels", cat)
+
+    if any(x in c for x in ['hostel']):
+        return ("Hospitality & Travel", "Lodging", "Hostels", cat)
+
+    if any(x in c for x in ['bed and breakfast', 'b&b', 'bnb']):
+        return ("Hospitality & Travel", "Lodging", "B&Bs", cat)
+
+    if any(x in c for x in ['resort']):
+        return ("Hospitality & Travel", "Lodging", "Resorts", cat)
+
+    if any(x in c for x in ['vacation rental', 'holiday rental']):
+        return ("Hospitality & Travel", "Lodging", "Vacation Rentals", cat)
+
+    if any(x in c for x in ['campground', 'camping', 'rv park']):
+        return ("Hospitality & Travel", "Lodging", "Campgrounds", cat)
+
+    if any(x in c for x in ['travel agency', 'tour operator', 'travel agent']):
+        return ("Hospitality & Travel", "Travel Services", "Travel Agencies", cat)
+
+    if any(x in c for x in ['airline', 'airport']):
+        return ("Hospitality & Travel", "Transportation", "Airlines & Airports", cat)
+
+    if any(x in c for x in ['cruise']):
+        return ("Hospitality & Travel", "Travel Services", "Cruises", cat)
+
+    if any(x in c for x in ['tourist', 'attraction', 'sightseeing']):
+        return ("Hospitality & Travel", "Attractions", "Tourist Attractions", cat)
+
+    # === FINANCE & INSURANCE ===
+    if any(x in c for x in ['bank', 'banking', 'credit union']):
+        return ("Finance & Insurance", "Banking", "Banks", cat)
+
+    if any(x in c for x in ['atm', 'cash machine']):
+        return ("Finance & Insurance", "Banking", "ATMs", cat)
+
+    if any(x in c for x in ['insurance']):
+        if 'health' in c or 'medical' in c:
+            return ("Finance & Insurance", "Insurance", "Health Insurance", cat)
+        if 'auto' in c or 'car' in c:
+            return ("Finance & Insurance", "Insurance", "Auto Insurance", cat)
+        if 'home' in c or 'property' in c:
+            return ("Finance & Insurance", "Insurance", "Home Insurance", cat)
+        if 'life' in c:
+            return ("Finance & Insurance", "Insurance", "Life Insurance", cat)
+        return ("Finance & Insurance", "Insurance", "Insurance Services", cat)
+
+    if any(x in c for x in ['loan', 'mortgage', 'lending']):
+        return ("Finance & Insurance", "Lending", "Loans", cat)
+
+    if any(x in c for x in ['investment', 'financial advisor', 'wealth management', 'financial planner']):
+        return ("Finance & Insurance", "Investment", "Financial Services", cat)
+
+    if any(x in c for x in ['currency exchange', 'money transfer', 'wire transfer']):
+        return ("Finance & Insurance", "Money Services", "Currency Services", cat)
+
+    if any(x in c for x in ['pawn']):
+        return ("Finance & Insurance", "Money Services", "Pawn Shops", cat)
+
+    # === REAL ESTATE ===
+    if any(x in c for x in ['real estate', 'property', 'realty', 'realtor']):
+        if 'agent' in c or 'agency' in c or 'broker' in c:
+            return ("Real Estate", "Agencies", "Real Estate Agents", cat)
+        if 'developer' in c or 'development' in c:
+            return ("Real Estate", "Development", "Developers", cat)
+        if 'management' in c:
+            return ("Real Estate", "Management", "Property Management", cat)
+        if 'commercial' in c:
+            return ("Real Estate", "Commercial", "Commercial Real Estate", cat)
+        return ("Real Estate", "Services", "Real Estate Services", cat)
+
+    if any(x in c for x in ['apartment', 'condo', 'rental']):
+        if 'complex' in c or 'building' in c:
+            return ("Real Estate", "Residential", "Apartment Complexes", cat)
+
+    if any(x in c for x in ['storage', 'self storage', 'warehouse']):
+        if 'self' in c or 'mini' in c:
+            return ("Real Estate", "Storage", "Self Storage", cat)
+
+    # === RELIGIOUS ===
+    if any(x in c for x in ['church']):
+        if 'catholic' in c:
+            return ("Religious", "Christian", "Catholic Churches", cat)
+        if 'baptist' in c:
+            return ("Religious", "Christian", "Baptist Churches", cat)
+        if 'methodist' in c:
+            return ("Religious", "Christian", "Methodist Churches", cat)
+        if 'lutheran' in c:
+            return ("Religious", "Christian", "Lutheran Churches", cat)
+        if 'orthodox' in c:
+            return ("Religious", "Christian", "Orthodox Churches", cat)
+        if 'pentecostal' in c:
+            return ("Religious", "Christian", "Pentecostal Churches", cat)
+        return ("Religious", "Christian", "Churches", cat)
+
+    if any(x in c for x in ['mosque', 'islamic', 'muslim']):
+        return ("Religious", "Islam", "Mosques", cat)
+
+    if any(x in c for x in ['synagogue', 'jewish', 'temple']):
+        if 'jewish' in c or 'synagogue' in c:
+            return ("Religious", "Judaism", "Synagogues", cat)
+        if 'hindu' in c:
+            return ("Religious", "Hinduism", "Hindu Temples", cat)
+        if 'buddhist' in c:
+            return ("Religious", "Buddhism", "Buddhist Temples", cat)
+        return ("Religious", "Other", "Temples", cat)
+
+    if any(x in c for x in ['abbey', 'monastery', 'convent']):
+        return ("Religious", "Christian", "Monasteries", cat)
+
+    if any(x in c for x in ['gurdwara', 'sikh']):
+        return ("Religious", "Sikhism", "Gurdwaras", cat)
+
+    # === GOVERNMENT & PUBLIC SERVICES ===
+    if any(x in c for x in ['government', 'city hall', 'town hall', 'municipal']):
+        return ("Government", "Local Government", "Government Offices", cat)
+
+    if any(x in c for x in ['court', 'courthouse']):
+        return ("Government", "Legal", "Courts", cat)
+
+    if any(x in c for x in ['police', 'sheriff']):
+        return ("Government", "Public Safety", "Police", cat)
+
+    if any(x in c for x in ['fire station', 'fire department']):
+        return ("Government", "Public Safety", "Fire Departments", cat)
+
+    if any(x in c for x in ['post office', 'postal']):
+        return ("Government", "Postal", "Post Offices", cat)
+
+    if any(x in c for x in ['embassy', 'consulate']):
+        return ("Government", "International", "Embassies", cat)
+
+    if any(x in c for x in ['dmv', 'motor vehicle', 'driver license']):
+        return ("Government", "Transportation", "DMV", cat)
+
+    if any(x in c for x in ['social security', 'welfare', 'social services']):
+        return ("Government", "Social Services", "Social Services", cat)
+
+    # === INDUSTRIAL & MANUFACTURING ===
+    if any(x in c for x in ['manufacturer', 'manufacturing', 'factory', 'plant']):
+        if any(x in c for x in ['food', 'beverage', 'bakery']):
+            return ("Industrial", "Manufacturing", "Food Manufacturing", cat)
+        if any(x in c for x in ['textile', 'clothing', 'garment']):
+            return ("Industrial", "Manufacturing", "Textile Manufacturing", cat)
+        if any(x in c for x in ['electronics', 'computer', 'semiconductor']):
+            return ("Industrial", "Manufacturing", "Electronics Manufacturing", cat)
+        if any(x in c for x in ['auto', 'car', 'vehicle']):
+            return ("Industrial", "Manufacturing", "Auto Manufacturing", cat)
+        if any(x in c for x in ['chemical', 'pharmaceutical']):
+            return ("Industrial", "Manufacturing", "Chemical Manufacturing", cat)
+        if any(x in c for x in ['metal', 'steel', 'iron']):
+            return ("Industrial", "Manufacturing", "Metal Manufacturing", cat)
+        if any(x in c for x in ['plastic', 'rubber']):
+            return ("Industrial", "Manufacturing", "Plastics Manufacturing", cat)
+        if any(x in c for x in ['furniture', 'wood']):
+            return ("Industrial", "Manufacturing", "Furniture Manufacturing", cat)
+        return ("Industrial", "Manufacturing", "General Manufacturing", cat)
+
+    if any(x in c for x in ['mining', 'quarry']):
+        return ("Industrial", "Mining", "Mining Operations", cat)
+
+    if any(x in c for x in ['construction company', 'builder']):
+        return ("Industrial", "Construction", "Construction Companies", cat)
+
+    # === TECHNOLOGY ===
+    if any(x in c for x in ['software', 'app developer', 'web developer']):
+        return ("Technology", "Software", "Software Development", cat)
+
+    if any(x in c for x in ['it service', 'computer service', 'tech support']):
+        return ("Technology", "IT Services", "IT Support", cat)
+
+    if any(x in c for x in ['data center', 'hosting', 'cloud']):
+        return ("Technology", "Infrastructure", "Data Services", cat)
+
+    if any(x in c for x in ['telecommunication', 'telecom', 'internet service']):
+        return ("Technology", "Telecommunications", "Telecom Services", cat)
+
+    # === TRANSPORTATION & LOGISTICS ===
+    if any(x in c for x in ['shipping', 'freight', 'cargo', 'logistics']):
+        return ("Transportation", "Logistics", "Shipping & Freight", cat)
+
+    if any(x in c for x in ['courier', 'delivery', 'express']):
+        return ("Transportation", "Delivery", "Courier Services", cat)
+
+    if any(x in c for x in ['taxi', 'cab', 'ride', 'limo', 'chauffeur']):
+        return ("Transportation", "Passenger", "Taxi & Ride Services", cat)
+
+    if any(x in c for x in ['bus', 'coach', 'shuttle']):
+        if 'station' in c or 'terminal' in c or 'stop' in c:
+            return ("Transportation", "Public Transit", "Bus Stations", cat)
+        return ("Transportation", "Passenger", "Bus Services", cat)
+
+    if any(x in c for x in ['train', 'rail', 'subway', 'metro']):
+        if 'station' in c or 'terminal' in c:
+            return ("Transportation", "Public Transit", "Train Stations", cat)
+        return ("Transportation", "Public Transit", "Rail Services", cat)
+
+    if any(x in c for x in ['towing', 'tow truck']):
+        return ("Transportation", "Vehicle Services", "Towing", cat)
+
+    # === AGRICULTURE ===
+    if any(x in c for x in ['farm', 'ranch', 'orchard', 'vineyard']):
+        return ("Agriculture", "Farming", "Farms", cat)
+
+    if any(x in c for x in ['agricultural', 'agri']):
+        return ("Agriculture", "Services", "Agricultural Services", cat)
+
+    # === PETS & ANIMALS ===
+    if any(x in c for x in ['pet', 'dog', 'cat']):
+        if 'grooming' in c or 'groomer' in c:
+            return ("Pets & Animals", "Pet Services", "Pet Grooming", cat)
+        if 'boarding' in c or 'kennel' in c or 'sitting' in c or 'daycare' in c:
+            return ("Pets & Animals", "Pet Services", "Pet Boarding", cat)
+        if 'training' in c or 'trainer' in c:
+            return ("Pets & Animals", "Pet Services", "Pet Training", cat)
+        if 'adoption' in c or 'shelter' in c or 'rescue' in c:
+            return ("Pets & Animals", "Animal Welfare", "Shelters", cat)
+        if 'store' in c or 'shop' in c:
+            return ("Retail & Shopping", "Pet Supplies", "Pet Stores", cat)
+
+    # === EVENTS & WEDDINGS ===
+    if any(x in c for x in ['wedding', 'bridal']):
+        if 'venue' in c or 'hall' in c:
+            return ("Events & Weddings", "Venues", "Wedding Venues", cat)
+        if 'planner' in c:
+            return ("Events & Weddings", "Planning", "Wedding Planners", cat)
+        if 'dress' in c or 'gown' in c:
+            return ("Events & Weddings", "Attire", "Bridal Shops", cat)
+        return ("Events & Weddings", "Services", "Wedding Services", cat)
+
+    if any(x in c for x in ['event', 'party', 'banquet']):
+        if 'venue' in c or 'hall' in c or 'center' in c:
+            return ("Events & Weddings", "Venues", "Event Venues", cat)
+        if 'planner' in c or 'planning' in c:
+            return ("Events & Weddings", "Planning", "Event Planners", cat)
+        if 'rental' in c or 'supply' in c:
+            return ("Events & Weddings", "Rentals", "Event Rentals", cat)
+        return ("Events & Weddings", "Services", "Event Services", cat)
+
+    if any(x in c for x in ['florist', 'flower']):
+        if 'shop' in c or 'store' not in c:
+            return ("Events & Weddings", "Florists", "Flower Shops", cat)
+
+    if any(x in c for x in ['funeral', 'mortuary', 'cremation', 'cemetery']):
+        return ("Events & Weddings", "Memorial", "Funeral Services", cat)
+
+    # === NON-PROFIT & COMMUNITY ===
+    if any(x in c for x in ['non-profit', 'nonprofit', 'charity', 'foundation']):
+        return ("Non-Profit", "Charities", "Non-Profit Organizations", cat)
+
+    if any(x in c for x in ['community', 'civic', 'volunteer']):
+        if 'center' in c:
+            return ("Non-Profit", "Community", "Community Centers", cat)
+        return ("Non-Profit", "Community", "Community Organizations", cat)
+
+    if any(x in c for x in ['association', 'organization', 'society']):
+        if 'professional' in c or 'trade' in c or 'business' in c:
+            return ("Non-Profit", "Professional", "Professional Associations", cat)
+        return ("Non-Profit", "General", "Organizations", cat)
+
+    # Default fallback
+    return ("Other", "Uncategorized", "General", cat)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Import GBP categories into PostgreSQL with ltree')
+    parser.add_argument('--csv-path', default=DEFAULT_CSV_PATH, help='Path to categories CSV')
+    parser.add_argument('--db-url', default=DEFAULT_DB_URL, help='PostgreSQL connection URL')
+    parser.add_argument('--dry-run', action='store_true', help='Print categories without importing')
+    args = parser.parse_args()
+
+    # Read categories
+    print(f"Reading categories from: {args.csv_path}")
+    categories = []
+    with open(args.csv_path, 'r', encoding='utf-8') as f:
+        reader = csv.reader(f)
+        next(reader)  # Skip header
+        for row in reader:
+            if row and row[0].strip():
+                categories.append(row[0].strip())
+
+    print(f"Found {len(categories)} categories")
+
+    # Build tree structure
+    tree = {}  # path -> (name, level, parent_path)
+
+    for cat in categories:
+        l1, l2, l3, l4 = categorize_category(cat)
+
+        # Build paths
+        l1_slug = slugify(l1)
+        l2_slug = slugify(l2)
+        l3_slug = slugify(l3)
+        l4_slug = slugify(l4)
+
+        # Level 1 (Sector)
+        l1_path = l1_slug
+        if l1_path not in tree:
+            tree[l1_path] = (l1, 1, None)
+
+        # Level 2 (Business Type)
+        l2_path = f"{l1_slug}.{l2_slug}"
+        if l2_path not in tree:
+            tree[l2_path] = (l2, 2, l1_path)
+
+        # Level 3 (Sub-category)
+        l3_path = f"{l1_slug}.{l2_slug}.{l3_slug}"
+        if l3_path not in tree:
+            tree[l3_path] = (l3, 3, l2_path)
+
+        # Level 4 (Specific Category)
+        l4_path = f"{l1_slug}.{l2_slug}.{l3_slug}.{l4_slug}"
+        if l4_path not in tree:
+            tree[l4_path] = (l4, 4, l3_path)
+
+    # Print statistics
+    level_counts = {1: 0, 2: 0, 3: 0, 4: 0}
+    for path, (name, level, parent) in tree.items():
+        level_counts[level] += 1
+
+    print(f"\nTree structure:")
+    print(f"  Level 1 (Sectors): {level_counts[1]}")
+    print(f"  Level 2 (Business Types): {level_counts[2]}")
+    print(f"  Level 3 (Sub-categories): {level_counts[3]}")
+    print(f"  Level 4 (Categories): {level_counts[4]}")
+    print(f"  Total nodes: {len(tree)}")
+
+    if args.dry_run:
+        print("\n[DRY RUN] Would insert these nodes:")
+        for path in sorted(tree.keys())[:20]:
+            name, level, parent = tree[path]
+            print(f"  {'  ' * (level-1)}{name} ({path})")
+        print(f"  ... and {len(tree) - 20} more")
+        return
+
+    # Check for psycopg2
+    if not HAS_PSYCOPG2:
+        print("\nERROR: psycopg2 is required for database import.")
+        print("Install it with: pip install psycopg2-binary")
+        return
+
+    # Connect to database
+    print(f"\nConnecting to database...")
+    conn = psycopg2.connect(args.db_url)
+    cur = conn.cursor()
+
+    # Run init SQL first
+    init_sql_path = os.path.join(os.path.dirname(__file__), 'init', '01_create_categories.sql')
+    if os.path.exists(init_sql_path):
+        print(f"Running init SQL: {init_sql_path}")
+        with open(init_sql_path, 'r') as f:
+            cur.execute(f.read())
+        conn.commit()
+
+    # Clear existing data
+    print("Clearing existing categories...")
+    cur.execute("TRUNCATE TABLE gbp_categories RESTART IDENTITY CASCADE")
+
+    # Insert nodes in order (parents first)
+    print("Inserting categories...")
+    path_to_id = {}
+
+    # Sort by level to ensure parents are inserted first
+    sorted_items = sorted(tree.items(), key=lambda x: x[1][1])
+
+    for path, (name, level, parent_path) in sorted_items:
+        parent_id = path_to_id.get(parent_path) if parent_path else None
+        slug = path.split('.')[-1]
+
+        cur.execute("""
+            INSERT INTO gbp_categories (name, slug, path, level, parent_id)
+            VALUES (%s, %s, %s, %s, %s)
+            RETURNING id
+        """, (name, slug, path, level, parent_id))
+
+        path_to_id[path] = cur.fetchone()[0]
+
+    # Update category counts
+    print("Updating category counts...")
+    cur.execute("""
+        UPDATE gbp_categories p
+        SET category_count = (
+            SELECT COUNT(*) FROM gbp_categories c
+            WHERE c.path <@ p.path AND c.path != p.path
+        )
+    """)
+
+    conn.commit()
+
+    # Verify
+    cur.execute("SELECT COUNT(*) FROM gbp_categories")
+    count = cur.fetchone()[0]
+    print(f"\nSuccess! Inserted {count} nodes into gbp_categories table")
+
+    # Show tree stats
+    cur.execute("SELECT * FROM category_tree_stats")
+    print("\nTree statistics:")
+    for row in cur.fetchall():
+        print(f"  Level {row[0]}: {row[1]} nodes")
+
+    cur.close()
+    conn.close()
+    print("\nDone!")
+
+
+if __name__ == '__main__':
+    main()
--- a/db/init/01_create_categories.sql
+++ b/db/init/01_create_categories.sql
@@ -0,0 +1,120 @@
+-- Enable ltree extension for hierarchical data
+CREATE EXTENSION IF NOT EXISTS ltree;
+
+-- Categories tree table
+CREATE TABLE IF NOT EXISTS gbp_categories (
+    id SERIAL PRIMARY KEY,
+    name TEXT NOT NULL,
+    slug TEXT NOT NULL,
+    path ltree NOT NULL,
+    level INT NOT NULL DEFAULT 1,
+    parent_id INT REFERENCES gbp_categories(id),
+    category_count INT DEFAULT 0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE(path)
+);
+
+-- Indexes for fast hierarchical queries
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_path ON gbp_categories USING GIST (path);
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_path_btree ON gbp_categories USING BTREE (path);
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_name ON gbp_categories (name);
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_slug ON gbp_categories (slug);
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_level ON gbp_categories (level);
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_parent ON gbp_categories (parent_id);
+
+-- Full text search index
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_name_trgm ON gbp_categories USING GIN (name gin_trgm_ops);
+
+-- Enable trigram extension for fuzzy search
+CREATE EXTENSION IF NOT EXISTS pg_trgm;
+
+-- Function to update timestamp
+CREATE OR REPLACE FUNCTION update_updated_at_column()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = CURRENT_TIMESTAMP;
+    RETURN NEW;
+END;
+$$ language 'plpgsql';
+
+-- Trigger for auto-updating timestamp
+DROP TRIGGER IF EXISTS update_gbp_categories_updated_at ON gbp_categories;
+CREATE TRIGGER update_gbp_categories_updated_at
+    BEFORE UPDATE ON gbp_categories
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+-- Helper function: Get all children of a category
+CREATE OR REPLACE FUNCTION get_category_children(parent_path ltree)
+RETURNS TABLE (
+    id INT,
+    name TEXT,
+    slug TEXT,
+    path ltree,
+    level INT
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT c.id, c.name, c.slug, c.path, c.level
+    FROM gbp_categories c
+    WHERE c.path <@ parent_path AND c.path != parent_path
+    ORDER BY c.path;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Helper function: Get ancestors of a category
+CREATE OR REPLACE FUNCTION get_category_ancestors(category_path ltree)
+RETURNS TABLE (
+    id INT,
+    name TEXT,
+    slug TEXT,
+    path ltree,
+    level INT
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT c.id, c.name, c.slug, c.path, c.level
+    FROM gbp_categories c
+    WHERE category_path <@ c.path AND c.path != category_path
+    ORDER BY c.level;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Helper function: Search categories by name (fuzzy)
+CREATE OR REPLACE FUNCTION search_categories(search_term TEXT, limit_count INT DEFAULT 20)
+RETURNS TABLE (
+    id INT,
+    name TEXT,
+    path ltree,
+    level INT,
+    similarity REAL
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT c.id, c.name, c.path, c.level,
+           similarity(c.name, search_term) as sim
+    FROM gbp_categories c
+    WHERE c.name ILIKE '%' || search_term || '%'
+       OR similarity(c.name, search_term) > 0.3
+    ORDER BY sim DESC, c.level, c.name
+    LIMIT limit_count;
+END;
+$$ LANGUAGE plpgsql;
+
+-- View for tree statistics
+CREATE OR REPLACE VIEW category_tree_stats AS
+SELECT
+    level,
+    COUNT(*) as count,
+    COUNT(*) FILTER (WHERE level = 1) as sectors,
+    COUNT(*) FILTER (WHERE level = 2) as business_types,
+    COUNT(*) FILTER (WHERE level = 3) as sub_categories,
+    COUNT(*) FILTER (WHERE level = 4) as leaf_categories
+FROM gbp_categories
+GROUP BY level
+ORDER BY level;
+
+COMMENT ON TABLE gbp_categories IS 'Google Business Profile categories organized in a 4-level hierarchy using ltree';
+COMMENT ON COLUMN gbp_categories.path IS 'Hierarchical path using ltree (e.g., Food_Dining.Restaurants.By_Cuisine.Afghan_restaurant)';
+COMMENT ON COLUMN gbp_categories.level IS '1=Sector, 2=Business Type, 3=Sub-category, 4=Specific Category';
--- a/db/recategorize_hierarchical.py
+++ b/db/recategorize_hierarchical.py
@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+"""
+Hierarchical categorization of Other items.
+
+APPROACH:
+1. First pass: Assign to Level 1 (Sector) - items that don't match go to sector's "Other" business type
+2. Second pass: Within each sector, refine Level 2 (Business Type)
+3. Third pass: Within each business type, refine Level 3 (Sub-category)
+
+This creates:
+- Sector.Other.Uncategorized for sector-level unknowns
+- Sector.BusinessType.Other for business-type-level unknowns
+
+EXISTING SECTORS (21 + Other):
+Agriculture, Automotive, Education, Entertainment, Events_Weddings, Finance_Insurance,
+Food_Dining, Government, Healthcare, Home_Services, Hospitality_Travel, Industrial,
+Non_Profit, Personal_Services, Pets_Animals, Professional_Services, Real_Estate,
+Religious, Retail_Shopping, Technology, Transportation, Other
+"""
+
+import re
+
+# ==================== LEVEL 1: SECTOR ASSIGNMENT ====================
+# Maps keyword patterns to sectors. Order matters - first match wins.
+# These are broad patterns to catch as much as possible at sector level.
+
+SECTOR_PATTERNS = [
+    # HEALTHCARE - Medical professionals, facilities, services
+    (r'(doctor|clinic|hospital|medical|health\s|dental|dentist|therapy|therapist|psycho|chiropract|optom|optician|pharmacy|pharmacist|nurse|surgeon|physician|cardiolog|dermatol|pediatr|orthoped|neurolog|oncolog|urolog|allergist|anesthesiol|audiolog|blood\sbank|blood\sdonat|blood\stest|dialysis|fertility|hospice|rehab|physiother|acupunct|naturopath|homeopath|osteopath|midwife|birth\scenter|prenatal|maternity|wellness\s(clinic|center)|diagnostic|x-ray|mri|ultrasound|laboratory|patholog|radiolog|pulmonolog|gastroenter|endocrin|rheumatol|immunolog|geriatr|podiatr|ophthalmolog|otolaryng|hematolog|nephrolog|proctolog|physiatrist|diabetolog|toxicolog|epidemiolog|oncology|assisted\sliving|nursing\shome|senior\scare|aged\scare|elder\scare|ambulance|emergency\sroom|urgent\scare|first\said|denture|diabetes\scenter|eye\scare|hiv\stest|perinatal|physical\sexam|pregnancy\scare|surgical\scenter|mammograph|std\stest|drug\stest|lactation|doula|bonesetting|hearing\said|prosthetic|orthotic|oxygen|ostomy|sleep\sclinic|sleep\slab|fertility|ivf|sperm\sbank|stem\scell|general\spractitioner|gynecolog|obstetrician|hepatolog|intensivist|internist|neurophysiol|orthoptist|prosthodontist|sexolog|venereolog|nutritionist|dietitian|endoscopist|kinesiolog|pedorthist|seitai|foot\scare|internal\smedicine|family\smedic|family\sdoctor|gp\s|medical\sward)', 'Healthcare'),
+
+    # EDUCATION - Schools, training, learning
+    (r'(school|university|college|academy|training\scenter|training\sschool|lesson|instructor|tutor|education|library|kindergarten|preschool|pre-?school|daycare|day\scare|learning\scenter|vocational|apprentice|faculty|campus|institute|seminary|boarding\sschool|private\sschool|public\sschool|elementary|middle\sschool|high\sschool|montessori|waldorf|charter\sschool|language\sschool|driving\sschool|flight\sschool|cooking\sclass|art\sclass|music\sclass|dance\sclass|acting\sclass|drama\sclass|conservatory|music\sacademy|ballet\sacademy|film\sschool|design\sschool|fashion\sschool|culinary|bartending|beauty\sschool|cosmetology|esthetician|barber\sschool|massage\sschool|yoga\steacher|yoga\straining|meditation\sclass|self-?defense\sclass|swimming\slesson|tennis\slesson|golf\slesson|ski\sschool|surf\sschool|scuba|sailing\sschool|studying\scenter|test\sprep|sat\sprep|gre\sprep|cram\sschool|juku|hagwon|coaching\scenter|head\sstart|early\shead|childminder|assistante\smaternelle|au\spair|nanny\sagency|student\sdormitor|student\shousing|student\scareer|career\scounseling|english\slanguage\scamp|language\scamp|summer\scamp|science\scamp|coding\scamp|academic\sdepartment)', 'Education'),
+
+    # AUTOMOTIVE - Vehicles, parts, services
+    (r'(auto\s|car\s|vehicle|motor\s|tire\s|tyre\s|mechanic|garage(?!\sdoor)|parking\s(lot|garage|facility)|driving|truck\s|motorcycle|motorbike|scooter\s|atv\s|automotive|car\swash|car\sdetail|car\sdealer|car\srental|car\slease|car\sinspect|car\sauction|smog\scheck|oil\schange|brake\s|transmission|radiator|exhaust|muffler|auto\sbody|collision|windshield|car\sstorage|towing|roadside)', 'Automotive'),
+
+    # TRANSPORTATION - Moving people/goods
+    (r'(airport|airline|aviation(?!\sschool)|aircraft|airplane|airfield|airstrip|heliport|seaplane|ferry|cruise|port\sauthority|port\soperating|harbor|dock\s|pier\s|marina|shipping|freight|cargo|trucking|logistics|warehouse|courier|messenger|delivery\sservice|taxi|cab\sservice|limo|chauffeur|bus\sstation|bus\sterminal|train\sstation|rail|metro|subway|transit|rickshaw|bicycle\srental|boat\srental|bike\sshare|car\sshare)', 'Transportation'),
+
+    # GOVERNMENT - Public administration, military, legal system
+    (r'(government|military|army\s|navy\s|naval\sbase|air\sforce|marine\s|coast\sguard|national\sguard|police|sheriff|law\senforce|fire\sstation|fire\sdepartment|courthouse|court\s|embassy|consulate|city\shall|municipal|county\s|district\soffice|passport|immigration|citizenship|dmv|tax\soffice|social\ssecurity|border|customs|post\soffice|postal|public\srecord|voter|election|legislature|parliament|congress|senate|mayor|governor|council|permit|license\s(office|bureau)|civil\sdefense|emergency\smanagement|public\ssafety|prison|jail|detention|correctional|probation|parole|aadhaar|agenzia\sentrate|anganwadi|asylum\scenter|city\sclerk|environment\soffice|land\sregistry|patent\soffice|pension\soffice|registration\soffice|registry\soffice|unemployment|employment\scenter|citizen\sinformation|consumer\sadvice|state\sarchive|national\sarchive|public\sarchive|guardia\scivil|highway\spatrol|department\sof|ministry\sof|bureau\sof|board\sof\seducation|public\sworks|sanitation|water\sauthority|housing\sauthority|port\sauthority|transit\sauthority)', 'Government'),
+
+    # RELIGIOUS - Places of worship, spiritual
+    (r'(church|temple|mosque|masjid|synagogue|chapel|cathedral|basilica|parish|religious|spiritual|ashram|monastery|convent|abbey|priory|buddhist|hindu|christian|catholic|protestant|orthodox|baptist|methodist|lutheran|presbyterian|pentecostal|evangelical|muslim|islamic|jewish|judai|sikh|gurdwara|gurudwara|baha.?i|shinto|taoist|quaker|mennonite|amish|latter-?day|jehovah|scientolog|meditation\scenter|retreat\scenter|pilgrimage|shrine|pagoda|wat\s|vihara|mission(?!\scontrol)|musalla|place\sof\sworship|rectory|yeshiva|marae|congregation|spiritist|priest|mohel|botanica)', 'Religious'),
+
+    # ENTERTAINMENT - Fun, recreation, sports, arts, culture
+    (r'(sports\s|sport\s|club(?!\shouse)|field$|court\s|gym\s|gymnasium|fitness|athletic|stadium|arena|pool\s|swimming|track\s|golf\s|tennis|soccer|football|basketball|baseball|hockey|volleyball|badminton|squash|racquetball|bowling|billiard|snooker|boxing|martial\sart|karate|judo|taekwondo|aikido|wrestling|fencing|archery|shooting\srange|gun\sclub|yoga\s|pilates|crossfit|cycling|skating|skateboard|skiing|snowboard|surfing|diving|climbing|bouldering|trampoline|gymnastics|dance\s|ballet|museum|theater|theatre|cinema|movie|art\sgallery|art\scenter|art\sstudio|gallery|music\svenue|concert|entertainment|amusement|theme\spark|water\spark|zoo|aquarium|wildlife|safari|botanical|arboretum|casino|gambling|betting|arcade|game\scenter|escape\sroom|laser\stag|paintball|go-?kart|mini\sgolf|comedy\sclub|jazz\sclub|blues\sclub|karaoke|nightclub|disco|rave|circus|carnival|fair\s|rodeo|bullring|race\strack|racecourse|hippodrome|velodrome|skate\spark|bmx|motocross|off-?road|aquatic\scenter|batting\scage|bungee|hang\sglid|paraglid|skydiv|indoor\ssnow|leisure\scenter|recreation\scenter|cultural\scenter|exhibit|festival|philharmon|opera\shouse|opera\scompany|symphony|orchestra|planetarium|observatory|science\scenter|discovery\scenter|children.*amusement|funfair|bouncy\scastle|inflatab|playground|adventure\spark|treetop|zipline|zip\sline|ropes\scourse|obstacle\scourse|ninja\swarrior|canoeing|kayaking|rafting|fishing\spond|fishing\sarea|bird\swatch|nature\sreserve|nature\scenter|hiking\strail|walking\strail|hiking\sarea|beach\spavil|beach\sresort|waterfront|promenade|pier\s(?!fishing)|boardwalk|scenic\spoint|scenic\sspot|lookout|viewpoint|observation|monument|landmark|castle|palace|fortress|historic\ssite|heritage|ruins|amphitheater|bandstand|gazebo|pavilion|curling\shall|scout\shall|scout\shome|village\shall|community\shall|social\shall|civic\scenter|convention\scenter|exhibition\scenter|artist$|band$|choir|musician|entertainer|magician|pyrotechnician|performing\sarts|stage$|sculpture|statuary|painting$|roller\scoaster|haunted\shouse|fairground|ghost\stown|lido|rugby|rugby\sfield|softball\sfield|little\sleague\sfield|water\spolo|cricket\sground|rowing\sarea|weightlifting|off\sroading|prawn\sfishing|raft\strip|mountaineering|summer\stoboggan|pumpkin\spatch|picnic\sground|national\sforest|national\sreserve|national\spark|nature\spreserve|protected\sarea|reenactment|sambodrome|pachinko|mahjong\shouse|children\shall|children.*camp|outdoor\sactivity|outdoor\sbath|onsen|thermal\sbath|day-?use\sonsen|foot\sbath)', 'Entertainment'),
+
+    # FOOD & DINING - Restaurants, bars, food production
+    (r'(restaurant|cafe(?!\steria)|café|coffee\s|espresso|bar\s(?!association)|pub\s|tavern|lounge|brewery|taproom|brewpub|winery|distillery|bakery|patisserie|pastry|dessert|ice\scream|gelato|frozen\syogurt|pizzeria|pizza\s|taco|burrito|sushi|ramen|noodle|dim\ssum|dumpling|steakhouse|steak\shouse|seafood|grill|bbq|barbecue|diner|bistro|brasserie|eatery|canteen|cafeteria|food\scourt|food\struck|food\scart|catering|caterer|buffet|brunch|breakfast|lunch|dinner|takeout|take-?away|delivery\sfood|meal|kitchen(?!\scabinet)|chef\s|cook\s|juice\sbar|smoothie|tea\shouse|traditional\steahouse|bubble\stea|boba|wine\sbar|wine\scellar|cocktail|speakeasy|gastropub|chophouse|crab\shouse|fish\s&\schips|curry|indian\srestaurant|chinese\srestaurant|chinese\stakeaway|italian\srestaurant|mexican\srestaurant|thai\srestaurant|japanese\srestaurant|korean\srestaurant|vietnamese|french\srestaurant|greek\srestaurant|mediterranean|middle\seastern|african\srestaurant|caribbean|latin\samerican|american\srestaurant|fast\sfood|quick\sservice|drive-?thru|dhaba|tiffin|hawker|churreria|creperie|crepe|pastelaria|pasteleria|tapas|izakaya|yakiniku|okonomiyaki|tempura|udon|soba|tonkatsu|kaiseki|robatayaki|teppanyaki|kushiyaki|yakitori|gyudon|poke\sbowl|acai|falafel|shawarma|kebab|gyro|pita|hummus|mezze|tagine|injera|pho|banh\smi|bibimbap|bulgogi|kimchi|hotpot|fondue|raclette|schnitzel|bratwurst|currywurst|pierogi|borscht|blini|pelmeni|empanada|arepa|pupusa|ceviche|asado|churrasco|rodizio|feijoada|moqueca|acaraje|jerk|oxtail|doubles|roti|samosa|biryani|tandoori|masala|tikka|naan|dosa|idli|vada|chaat|thali|satay|laksa|rendang|nasi\sgoreng|pad\sthai|som\stam|tom\syum|green\scurry|massaman|poutine|smoked\smeat|lobster\sroll|clam\schowder|po.?boy|gumbo|jambalaya|soul\sfood|southern\sfood|cajun|creole|carvery|dairy$|frituur|fruit\sparlor|meyhane|sugar\shack|yakatabune|olive\soil\scooperative|soy\ssauce)', 'Food_Dining'),
+
+    # HOME SERVICES - Home improvement, maintenance, repair
+    (r'(plumb|electrician|electrical\scontract|hvac|heating|air\scondition|cooling|roof|landscap|lawn\s|garden\sservice|gardener|arborist|tree\sservice|clean\s(service|company)|cleaning\sservice|cleaners$|pest\scontrol|exterminator|paint\scontract|painter(?!\sartist)|paint\sstrip|carpent|cabinet\smaker|flooring|tile\sinstall|hardwood|carpet\sinstall|repair\sservice|contractor|remodel|renovation|handyman|locksmith\sservice|moving\scompany|mover\s|moving\sand\sstorage|piano\smoving|appliance\srepair|garage\sdoor|gutter|chimney|window\sinstall|door\sinstall|double\sglazing|glass\srepair|fence\s|deck\sbuild|patio|drywall|insulation|siding|masonry|brick|concrete|paving|asphalt|pool\sservice|pool\scleaning|spa\sservice|septic|sewer|drain|water\sheater|well\sdrill|solar\sinstall|solar\spanel\smaintenance|security\ssystem|alarm\sinstall|home\sinspect|building\sinspect|surveyor|interior\sdesign|home\sstaging|pressure\swash|graffiti\sremoval|debris\sremoval|junk\sremoval|house\sclearance|snow\sremoval|antenna\sservice|satellite\sinstall|gasfitter|gas\sinstall|height\sworks|impermeabilization|wallpaper\sinstall|airbrushing|home\shelp|stall\sinstall)', 'Home_Services'),
+
+    # RETAIL & SHOPPING - Stores, shops, markets
+    (r'(store\s|shop\s(?!service)|retail|boutique|market(?!ing)|mall\s|outlet|dealer(?!ship)|supplier|wholesale|distributor|supermarket|grocery|convenience|department\sstore|discount|thrift|consignment|pawn|antique|vintage|secondhand|used\s|book\sstore|stationery|office\ssupply|toy\sstore|game\sstore|hobby|craft\sstore|art\ssupply|music\sstore|record\sstore|electronics|computer\sstore|phone\sstore|appliance\sstore|furniture\sstore|home\sdecor|bedding|mattress|kitchenware|hardware|tool\sstore|building\ssupply|lumber|garden\scenter|plant\snursery|florist|flower\sshop|pet\sstore|pet\ssupply|clothing|fashion|apparel|shoe\sstore|jewelry|watch\sstore|cosmetic|beauty\ssupply|pharmacy|drugstore|health\sstore|vitamin|supplement|sporting\sgoods|outdoor\sstore|bicycle\sshop|gun\sshop|hunting|fishing\sstore|camping|liquor|wine\sshop|beer\sstore|tobacco|cigar|vape|smoke\sshop|candy|chocolate|confection|bakery\sshop|cheese\sshop|spice|tea\sshop|coffee\sshop(?!\scafe)|newsstand|kiosk|vending|bazar|bazaar|hawker\scenter|flea\smarket|farmers\smarket|night\smarket|food\shall|food\scourt|deli(?!very)|delicatessen|charcuterie|butcher|fishmonger|greengrocer|produce|fruit\sstand|flower\sstand|fabric|textile\sshop|yarn|knitting|sewing\sshop|craft\ssuppl|frame\sshop|framing|trophy|engraving|gift\sshop|souvenir|duty\sfree|airport\sshop|convenience|corner\sstore|general\sstore|variety|dollar\sstore|pound\sshop|euro\sshop|99\scent|surplus|closeout|liquidat|outlet\small|factory\soutlet|warehouse\sstore|membership\sclub|costco|sam.*club)', 'Retail_Shopping'),
+
+    # PROFESSIONAL SERVICES - Business services, consulting, legal, creative
+    (r'(lawyer|attorney|law\sfirm|legal\sservice|accountant|accounting|bookkeep|cpa\s|tax\s(prepar|service|consult)|consultant|consulting|architect(?!ure)|engineer(?!ing\sschool)|survey\scompany|land\ssurvey|topograph|agency(?!\sgovernment)|staffing|recruiting|recruiter|employment\sagency|hr\sservice|marketing|advertis|pr\sfirm|public\srelations|graphic\sdesign|web\sdesign|website\sdesign|photography|photographer|videograph|film\sproduction|animation\sstudio|recording\sstudio|rehearsal\sstudio|production\sstudio|portrait\sstudio|model\sportfolio\sstudio|painting\sstudio|translation|interpret|transcription|notary|commissioner\sfor\soaths|private\sinvestigat|detective|appraiser|appraisal|estate\sappraiser|auditor|financial\saudit|actuary|financial\splanner|wealth\smanag|investment\sadvis|business\sconsult|management\sconsult|it\sconsult|media\scompany|media\shouse|record\scompany|scenograph|model\sdesign|telemarket|direct\smail|copywriter|editor|proofreader|technical\swriter|ghostwriter|literary\sagent|talent\sagent|booking\sagent|casting|modeling\sagent|artist\smanage|court\sreport|patent\sagent|trademark|intellectual\sproperty|customs\sbroker|freight\sforward|import\sexport|export\scompany|geological\sresearch|geological\sservice|environmental\sconsult|safety\sconsult|quality\sconsult|process\sserv|skip\strac|bail\senforce|collection\sagent|factoring|mezzanine\sfinance|conveyancer|executor|genealogist|gemologist|loss\sadjuster|foreclosure|insolvency|judicial\sscrivener|commercial\sagent|executive\ssearch|payroll\sservice|resume\sservice|typing\sservice|fax\sservice|mailing\sservice|shredding\sservice|blueprint|drafting|mapping\sservice|research\sand\sproduct|information\sservice|news\sservice|music\smanagement|yacht\sbroker|finance\sbroker|food\sbroker)', 'Professional_Services'),
+
+    # INDUSTRIAL - Manufacturing, construction, mining, utilities, trades
+    (r'(factory|plant(?!\snursery)|mill$|mill\s|manufactur|industrial|mining|mine\s|quarry|production|foundry|forge|smelter|refinery|chemical\s|pharmaceutical\scompan|textile|garment\sfactory|food\sprocessing|cannery|bottling|assembly|fabricat|machine\sshop|metal\swork|metal\sprocess|metallurg|welding|welder|steel|iron\sworks|aluminum|plastic|rubber|paper\smill|lumber\smill|sawmill|saw\smill|print\sshop|commercial\sprint|digital\sprint|packaging|recycling|waste\smanagement|construction\scompany|general\scontractor|building\scompany|building\sfirm|developer|civil\sengineering|demolition|excavat|crane\sservice|scaffold|heavy\sequipment|blacksmith|coppersmith|goldsmith|silversmith|horseshoe|locksmith(?!\sservice)|tinsmith|gunsmith|bladesmith|knifesmith|boilermaker|machinist|millwright|pipefitter|rigger|sheet\smetal|ironwork|structural\ssteel|precast|concrete\splant|asphalt\splant|gravel|aggregate|sand\s&\sgravel|earth\sworks|anodizing|electroplat|galvaniz|powder\scoat|metal\spolish|metal\sfinish|sandblast|shot\sblast|heat\streat|tempering|hardening|casting|die\scast|injection\smold|blow\smold|extrusion|stamping|forging|cnc|lathe|milling\smachine|grinding|boring|drilling|water\sutility|electric\sutility|gas\scompany|power\sstation|power\splant|nuclear\spower|solar\senergy|wind\sfarm|hydroelectric|substation|transformer|utility\scompany|water\spurification|sewage|wastewater|biotechnolog|shipbuilding|ship\srepair|shipyard|dry\sdock|boatyard|marine\sengine|propeller|cotton\smill|flour\smill|rice\smill|jute\smill|water\smill|weaving\smill|cider\smill|slaughterhouse|tannery|dyeworks|meat\spacker|meat\sprocessor|fruit.*processing|glass\sindustry|sewing\scompany|turnery|toolroom|machine\sconstruct|stone\scutter|stone\scarving|joiner|woodworker|plasterer|glazier|plating\sservice|embossing|lamination|laser\scutting|water\sjet|salvage\syard|junkyard|garbage\sdump|waste\stransfer|coalfield|oilfield)', 'Industrial'),
+
+    # HOSPITALITY & TRAVEL - Lodging, tourism
+    (r'(hotel|motel|inn\s|resort|hostel|lodge\s|bed\s&\sbreakfast|bed\sand\sbreakfast|b&b|guesthouse|guest\shouse|vacation\srental|holiday\s(rental|apartment|home)|cabin\srental|cottage\srental|cottage(?!\sindustry)|chalet|airbnb|vrbo|travel\sagent|travel\sagency|tour\soperator|tour\sguide|tourist\s(information|office|attraction)|sightseeing|excursion|cruise|camping|campground|caravan\spark|rv\spark|glamping|youth\shostel|retreat\scenter(?!\sreligious)|boarding\shouse|rooming\shouse|dormitory(?!\sstudent)|rest\sstop|rest\sarea|truck\sstop|service\sarea|visitor\scenter|welcome\scenter|country\shouse|manor\shouse|estate\shouse|villa\srental|apartment\shotel|extended\sstay|residence\sinn|suite\shotel|capsule\shotel|love\shotel|ryokan|minshuku|pension\s|agriturismo|pousada|parador|paradores)', 'Hospitality_Travel'),
+
+    # PERSONAL SERVICES - Beauty, wellness, personal care
+    (r'(salon\s|spa\s(?!automotive)|massage(?!\schair)|tattoo|piercing|body\sart|barber|beauty\s(?!supply|store)|nail\s|manicure|pedicure|hair\s(salon|stylist|dresser|cut)|waxing|threading|lash|brow|eyelash|makeup\sartist|esthetician|cosmetolog|tanning|sunbed|sauna|steam\sroom|bathhouse|hammam|laundry|laundromat|dry\sclean|tailor|alteration|seamstress|shoe\srepair|cobbler|watch\srepair|key\scutting|weight\sloss|diet\scenter|personal\strainer|life\scoach|dating\sservice|matchmak)', 'Personal_Services'),
+
+    # FINANCE & INSURANCE - Banks, financial services
+    (r'(bank(?!\sfood)|credit\sunion|savings\s&\sloan|atm\s|insurance\s(agent|agency|company|broker)|mortgage|loan\s(company|officer|broker)|lending|finance\scompany|financial\sservic|investment\s(firm|company|bank)|stock\sbroker|wealth\smanage|money\stransfer|remittance|currency\sexchange|forex|check\scash|payday\sloan|pawn(?!shop)|bail\sbond|credit\srepair|debt\scollect|factoring|leasing\scompany)', 'Finance_Insurance'),
+
+    # REAL ESTATE - Property, housing, storage
+    (r'(real\sestate|realtor|property\s(agent|management|company)|apartment\s(complex|building|rental)|condo|condominium|housing|home\sbuilder|land\sdeveloper|commercial\sreal|office\sspace|coworking|business\scenter|storage\s(facility|unit)|self.?storage|mini\sstorage|warehouse\sspace|parking\sspace|mobile\shome\spark|trailer\spark)', 'Real_Estate'),
+
+    # EVENTS & WEDDINGS - Event services, funeral
+    (r'(funeral|mortuary|cremation|crematorium|cemetery|memorial\s|casket|burial|wedding\s(planner|venue|dress|photographer)|event\s(planner|venue|center)|party\s(planner|supply|rental)|banquet\shall|reception\shall|conference\scenter|convention|meeting\sroom|catering\shall|dj\sservice|disc\sjockey|band\sfor\shire|balloon|decoration\sservice|tent\srental|photo\sbooth|florist(?!\sshop))', 'Events_Weddings'),
+
+    # NON-PROFIT - Charities, community organizations, social services
+    (r'(charity|charitable|non-?profit|ngo\s|foundation(?!\srepair)|community\scenter|community\sorganiz|civic\s|volunteer|food\sbank|soup\skitchen|homeless\s(shelter|service)|social\sservice|social\sworker|welfare\soffice|crisis\scenter|hotline|support\sgroup|self-?help|aa\s|alcoholics|narcotics\sanonymous|veteran|vfw|american\slegion|rotary|lions\sclub|kiwanis|elks|freemason|masonic|fraternal|chamber\sof\scommerce|chamber\sof\shandicrafts|trade\sassociation|professional\sassociation|labor\sunion|tenant.*union|indigenous|aboriginal|tribal|youth\scenter|youth\scare|youth\sgroup|senior\scitizen\scenter|women.s\s(shelter|center|protection)|domestic\sviolence|battered|abuse\s(shelter|center)|halfway\shouse|sober\sliving|addiction\s(center|service)|recovery\scenter|rehab\scenter(?!ilitation)|detox|mental\shealth\sadvocacy|disability\s(service|advocacy)|deaf\sservice|blind\sservice|immigrant\s(service|aid)|refugee\s(service|aid|camp)|legal\said|pro\sbono|family\sservice|family\splanning|birth\scontrol|child\swelfare|foster\scare|adoption\sagency|big\sbrothers|big\ssisters|boys\s&\sgirls|ymca|ywca|jewish\scommunity|jcc|salvation\sarmy|goodwill|habitat\sfor\shumanity|red\scross|united\sway|make-?a-?wish|special\solympics|donations\scenter|thrift(?!\sstore)|donation\sdrop|orphanage|children.*home|group\shome|shelter$|scouting|literacy\sprogram|crime\svictim|mediation\sservice|special\seducator|playgroup|student\sunion)', 'Non_Profit'),
+
+    # TECHNOLOGY - IT, software, telecom
+    (r'(software|app\sdevelop|web\sdevelop|it\sservice|it\ssupport|computer\sservice|computer\srepair|computer\ssecurity|computer\snetwork|tech\ssupport|data\scenter|data\srecovery|data\sentry|database|server\s(farm|hosting)|cloud\sservice|internet\sservice|isp\s|broadband|telecom|telephone\scompany|mobile\s(operator|network)|cell\sphone\sservice|fiber\soptic|satellite\s(communication|service)|cable\sprovider|cybersecurity|network|systems\sintegrat|bpo|call\scenter|outsourc|automation\scompany|home\sautomation|robotics|ai\scompany|machine\slearning|e-?commerce|digital\smarketing|seo|web\shost|domain\sregist|ssl|vpn|managed\sservice|msp|helpdesk|remote\ssupport|pc\srepair)', 'Technology'),
+
+    # AGRICULTURE - Farming, ranching
+    (r'(farm(?!acy|er.s\smarket)|ranch|agriculture|livestock|cattle|poultry|dairy\sfarm|pig\sfarm|sheep|goat|horse\sfarm|stable(?!\sservice)|equestrian\scenter|riding\sschool|crop|orchard|vineyard(?!\swinery)|plantation|greenhouse|horticulture|nursery(?!school)|floricult|aquaculture|fish\sfarm|beekeep|apiary|agronomy|fertilizer|seed\scompany|farm\sequipment|tractor|irrigation|grain|silo|feed\sstore|livestock\sauction|veterinari.*(large|farm|livestock))', 'Agriculture'),
+
+    # PETS & ANIMALS - Pet services, animal welfare
+    (r'(pet\s(?!rol)|animal\s(?!hospital|clinic)|dog\s(?!hot)|cat\s|bird\s(?!watch)|fish\s(?!market|restaurant)|reptile|aquarium\sstore|vet(?!eran)|veterinar(?!.*large|.*farm)|kennel|doggy\sdaycare|pet\sgrooming|pet\sboarding|pet\ssitting|dog\swalk|pet\strain|animal\sshelter|animal\srescue|animal\scontrol|humane\ssociety|spca|aspca|wildlife\srehab|sanctuary|cattery|aviary|breeder|stud\sservice|horse\sboarding|stable(?!\sindustry)|equine|farrier|horse\sshoe)', 'Pets_Animals'),
+
+    # FINANCE & INSURANCE - Banks, financial services
+    (r'(bank(?!\sfood)|credit\sunion|savings\s&\sloan|atm\s|insurance\s(agent|agency|company|broker)|mortgage|loan\s(company|officer|broker)|lending|finance\scompany|financial\sservic|investment\s(firm|company|bank)|stock\sbroker|wealth\smanage|money\stransfer|remittance|currency\sexchange|forex|check\scash|payday\sloan|bail\sbond|credit\srepair|debt\scollect|factoring|leasing\scompany|venture\scapital|private\sequity|hedge\sfund|asset\smanag|trust\scompany|escrow|title\scompany|credit\scounseling|financial\splanning|retirement\splanning|pension\sfund|401k|ira|annuity|securities|commodities|futures|options|trading|brokerage|fintech|mobile\smoney|digital\swallet|cryptocurrency|bitcoin|blockchain)', 'Finance_Insurance'),
+
+    # Catch more rentals and specialized services
+    (r'(equipment\srental|tool\srental|party\srental|tent\srental|chair\srental|table\srental|linen\srental|costume\srental|tuxedo\srental|dress\srental|appliance\srental|furniture\srental|office\sequipment\srental|audiovisual.*rental|av\srental|musical\sinstrument\srental|ski\srental|snowboard\srental|snowmobile\srental|jet\sski\srental|boat\srental|kayak\srental|canoe\srental|bicycle\srental|scooter\srental|segway|atv\srental|motorcycle\srental|rv\srental|camper\srental|trailer\srental|truck\srental|van\srental|car\srental|forklift\srental|crane\srental|scaffolding\srental|construction.*rental|dumpster\srental|portable\stoilet|porta.*potty)', 'Retail_Shopping'),
+
+    # Specialized restoration and repair services
+    (r'(restoration\sservice|furniture\srestoration|antique\srestoration|art\srestoration|photo\srestoration|document\srestoration|clock\srepair|watch\srepair|jewelry\srepair|shoe\srepair|luggage\srepair|leather\srepair|upholstery\srepair|musical\sinstrument\srepair|piano\stuning|guitar\srepair|violin\srepair|camera\srepair|electronics\srepair|phone\srepair|screen\srepair|computer\srepair|printer\srepair|copier\srepair|typewriter|sewing\smachine\srepair|vacuum\srepair|small\sengine\srepair|lawn\smower\srepair|chainsaw|power\stool\srepair|fire\sextinguisher\sservice|scale\srepair|calibration|water\sdamage\srestoration|fire\sdamage|smoke\sdamage|mold\sremediation|biohazard|crime\sscene\sclean|hoarding\sclean)', 'Home_Services'),
+
+    # Specialized trades and craftspeople
+    (r'(clock\smaker|watch\smaker|furniture\smaker|cabinet\smaker|instrument\smaker|stringed\sinstrument\smaker|piano\smaker|organ\sbuilder|luthier|bookbinder|print\smaker|engraver|etcher|lithograph|screen\sprint|sign\smaker|sign\spainter|glass\sblower|stained\sglass|ceramic|pottery|potter|sculptor|woodcarver|wood\sturner|basket\smaker|weaver|spinner|knitter|quilter|longarm|embroidery|monogram|tailor|seamstress|dressmaker|milliner|cobbler|saddle|harness|leather\scraft|upholster|framemaker|gilder|conservator|taxiderm|model\smaker|prop\smaker|costume\smaker|wig\smaker|prosthetic|mask\smaker|puppet|doll\smaker|toy\smaker)', 'Industrial'),
+
+    # Specialized testing and inspection services
+    (r'(testing\sservice|inspection\sservice|asbestos\stest|lead\stest|radon\stest|water\stest|soil\stest|air\squality|environmental\stest|mold\stest|home\sinspect|building\sinspect|property\sinspect|roof\sinspect|termite\sinspect|pest\sinspect|pool\sinspect|chimney\sinspect|septic\sinspect|well\sinspect|electrical\sinspect|plumbing\sinspect|hvac\sinspect|fire\sinspect|safety\sinspect|code\senforcement|energy\saudit|blower\sdoor|duct\stest|infrared|thermal\simag)', 'Professional_Services'),
+
+    # Personal and lifestyle services
+    (r'(psychic|astrologer|fortune\steller|fortune\stelling|palm\sread|tarot|medium|spiritual\sadvis|feng\shui|numerolog|grapholog|hypnotherap|hypnosis|past\slife|akashic|aura|chakra|reiki|energy\shealing|crystal\shealing|sound\shealing|aromatherap|reflexolog|iridolog|kinesiology|craniosacral|rolfing|alexander\stechnique|feldenkrais|pilates\sinstructor|yoga\sinstructor|meditation\sinstructor|breathwork|pranayama|ayurved|traditional\schinese|tcm|herbalist|naturopath|homeopath|beautician|esthetician|esthetics|body\sshaping|boot\scamp|loctician|mehandi|mehndi|teeth\swhitening|wellness\sprogram|alternative\smedicine\spractitioner)', 'Personal_Services'),
+
+    # More Government patterns
+    (r'(archive$|birth\scertificate|city\semployment|state\semployment|company\sregistry|district\sjustice|justice\sdepartment|land\splanning|urban\splanning|toll\sstation|traffic\sofficer|weigh\sstation|sanitary\sinspect|smog\sinspect|superfund|water\sworks|weather\sforecast|ground\sself\sdefense|united\sstates\sarmed|radio\sbroadcaster|television\sstation|closed\scircuit|communications\stower)', 'Government'),
+
+    # More Transportation patterns
+    (r'(boat\sramp|container\sterminal|helicopter\scharter|river\sport|transportation\sservice|transportation\sescort|fixed-?base\soperator|handicapped\stransportation|carpooling)', 'Transportation'),
+
+    # More Finance patterns
+    (r'(diamond\sbuyer|financial\sinstitution|holding\scompany|leasing\sservice|stock\sexchange|money\sorder|payment\sterminal)', 'Finance_Insurance'),
+
+    # More Real Estate patterns
+    (r'(corporate\soffice|display\shome|townhouse\scomplex|villa$|serviced\s(accommodation|apartment)|function\sroom|virtual\soffice)', 'Real_Estate'),
+
+    # More Entertainment/Sports patterns
+    (r'(fishing\s(camp|charter|pier)|horseback\sriding|horse\srental|equestrian\sfacility|outdoor\sequestrian|salsa\sclass|wood\sworking\sclass|stitching\sclass|childbirth\sclass|mehandi\sclass)', 'Entertainment'),
+
+    # More Industrial/Repair patterns
+    (r'(engine\srebuilding|machine\smaintenance|saw\ssharpening|skate\ssharpening|sharpening\sservice|lpg\sconversion|cng\sfitment|boat\sdetailing|rv\sdetailing|rv\srepair|bike\swash|fire\sprotection|elevator\sservice|drone\sservice)', 'Industrial'),
+
+    # More Retail patterns
+    (r'(haberdashery|jeweler$|lapidary|glass\smerchant|furniture\saccessories|showroom$|tesla\sshowroom|bottle.*redemption|coin\soperated)', 'Retail_Shopping'),
+
+    # More Professional Services patterns
+    (r'(building\sdesigner|polygraph|professional\sorganizer|video\s(conferencing|duplication|editing)|meeting\splanning|personal\sconcierge|house\ssitter|marriage\scelebrant|singing\stelegram|roommate\sreferral)', 'Professional_Services'),
+
+    # Miscellaneous remaining - catch-all for specific items
+    (r'(agistment|auction\shouse|appliances\scustomer|bicycle\srack|bridge$|building\sequipment\shire|container\sservice|distribution\sservice|diaper\sservice|divorce\sservice|drinking\swater\sfountain|energy\sequipment|environment\srenewable|forestry\sservice|fur\sservice|garbage\scollection|garden$|handicraft|hiking\sguide|homekill|judicial\sauction|key\sduplication|land\sallotment|line\smark|livery\scompany|lodge$|lodging$|lyceum|mailbox\srental|marquee\shire|memorial$|mercantile|mineral\swater\scompany|mold\smaker|office\srefurbish|oil\sand\sgas\sexploration|orchid\sgrower|package\slocker|pedestrian\szone|road\ssafety\stown|sacem|sailmaker|seating\ssystems|security\s(guard|service)|shoe\sshining|societe|staple\sfood|tenant\sownership|ticket\soffice|weir|wi-?fi\sspot)', 'Other'),
+]
+
+def get_sector_for_item(name):
+    """
+    Determine which sector an item belongs to.
+    Returns sector slug or 'Other' if no match.
+    """
+    name_lower = name.lower()
+
+    for pattern, sector in SECTOR_PATTERNS:
+        if re.search(pattern, name_lower, re.IGNORECASE):
+            return sector
+
+    return 'Other'
+
+
+# ==================== LEVEL 2: BUSINESS TYPE PATTERNS ====================
+# These are more specific patterns within each sector
+
+BUSINESS_TYPE_PATTERNS = {
+    'Entertainment': [
+        (r'(fitness|gym|workout|crossfit|pilates|yoga|aerobic|exercise|weight\s(room|training)|spin\sclass|bootcamp)', 'Fitness'),
+        (r'(sports\s|athletic|stadium|arena|field\s|court\s|track\s|league|team\s)', 'Sports'),
+        (r'(museum|exhibit|gallery|art\s(center|gallery)|sculpture)', 'Museums'),
+        (r'(theater|theatre|playhouse|opera|ballet|symphony|orchestra|concert|performance|show)', 'Performing Arts'),
+        (r'(cinema|movie|film|drive-?in)', 'Movies'),
+        (r'(park(?!\sing)|playground|recreation|picnic|garden|botanical|arboretum|nature|trail)', 'Parks'),
+        (r'(amusement|theme\spark|water\spark|carnival|fair|ride|attraction)', 'Amusement'),
+        (r'(arcade|game|escape\sroom|laser|paintball|go.?kart|bowling|billiard|mini\sgolf)', 'Games & Recreation'),
+        (r'(casino|gambling|betting|poker|slot)', 'Gambling'),
+        (r'(club|nightclub|disco|bar|lounge)', 'Social'),
+        (r'(zoo|aquarium|wildlife|safari|sanctuary)', 'Wildlife'),
+        (r'(music|concert|jazz|blues|rock|karaoke)', 'Music Venues'),
+    ],
+    'Healthcare': [
+        (r'(hospital|medical\scenter|health\scenter)', 'Hospitals'),
+        (r'(clinic|office|practice|urgent\scare)', 'Clinics'),
+        (r'(dentist|dental|orthodont|oral\ssurg|periodont|endodont)', 'Dental'),
+        (r'(eye|vision|optom|optician|ophthalmolog)', 'Vision Care'),
+        (r'(mental|psych|counsel|therapist|psychiatr)', 'Mental Health'),
+        (r'(chiropract|acupunct|naturopath|homeopath|osteopath|alternative|holistic)', 'Alternative Medicine'),
+        (r'(physical\stherap|occupational|speech|rehab)', 'Rehabilitation'),
+        (r'(lab|diagnostic|patholog|radiology|x-?ray|imaging|blood\stest)', 'Diagnostics'),
+        (r'(pharmacy|drugstore|prescription)', 'Pharmacies'),
+        (r'(senior|aged|elder|nursing\shome|assisted)', 'Senior Care'),
+        (r'(emergency|ambulance|paramedic|first\said|urgent)', 'Emergency Services'),
+        (r'(veterinar|vet\s|animal\s(hospital|clinic))', 'Veterinary'),
+        (r'(doctor|physician|surgeon|specialist|practitioner)', 'Medical Practitioners'),
+    ],
+    'Food_Dining': [
+        (r'(restaurant|eatery|dining|bistro|brasserie|grill|steakhouse)', 'Restaurants'),
+        (r'(cafe|café|coffee|espresso|tea\shouse)', 'Cafes & Coffee'),
+        (r'(bar\s|pub|tavern|brewery|taproom|lounge|cocktail|wine\sbar)', 'Bars & Nightlife'),
+        (r'(bakery|patisserie|pastry|bread|donut|bagel)', 'Bakeries & Desserts'),
+        (r'(ice\scream|gelato|dessert|frozen\syogurt|candy|chocolate)', 'Bakeries & Desserts'),
+        (r'(fast\sfood|quick\sservice|drive.?thru|takeout|take.?away)', 'Quick Service'),
+        (r'(caterer|catering|food\sservice|meal\sprep)', 'Food Services'),
+        (r'(winery|distillery|vineyard)', 'Beverage Production'),
+    ],
+    'Home_Services': [
+        (r'(plumb|pipe|drain|sewer|septic)', 'Plumbing'),
+        (r'(electric|wiring|panel|outlet)', 'Electrical'),
+        (r'(hvac|heat|cool|air\scondition|furnace)', 'HVAC'),
+        (r'(roof|gutter|shingle)', 'Roofing'),
+        (r'(landscap|lawn|garden|tree|arbor)', 'Landscaping'),
+        (r'(clean|maid|janitor|housekeep)', 'Cleaning'),
+        (r'(pest|exterminator|termite)', 'Pest Control'),
+        (r'(paint|drywall|plaster|wallpaper)', 'Construction'),
+        (r'(floor|carpet|tile|hardwood)', 'Flooring'),
+        (r'(window|door|glass)', 'Windows & Doors'),
+        (r'(pool|spa|hot\stub)', 'Pool & Spa'),
+        (r'(security|alarm|lock|safe)', 'Security'),
+        (r'(appliance|washer|dryer|refrigerator)', 'Appliance Repair'),
+        (r'(handyman|repair|fix|maintenance)', 'General Repair'),
+        (r'(construct|build|remodel|renovation|contractor)', 'Construction'),
+        (r'(mov(er|ing)|relocat)', 'Moving'),
+        (r'(interior|decor|design|stag)', 'Design'),
+    ],
+}
+
+
+def get_business_type_for_item(name, sector):
+    """
+    Determine which business type an item belongs to within a sector.
+    Returns business type or 'Other' if no match.
+    """
+    if sector not in BUSINESS_TYPE_PATTERNS:
+        return 'Other'
+
+    name_lower = name.lower()
+
+    for pattern, btype in BUSINESS_TYPE_PATTERNS[sector]:
+        if re.search(pattern, name_lower, re.IGNORECASE):
+            return btype
+
+    return 'Other'
+
+
+def main():
+    """Main function to categorize and show results"""
+    import sys
+
+    # Read items from stdin or file
+    if len(sys.argv) > 1:
+        with open(sys.argv[1]) as f:
+            items = [line.strip() for line in f if line.strip()]
+    else:
+        items = [line.strip() for line in sys.stdin if line.strip()]
+
+    # Categorize
+    results = {}
+    for name in items:
+        sector = get_sector_for_item(name)
+        btype = get_business_type_for_item(name, sector)
+
+        key = (sector, btype)
+        if key not in results:
+            results[key] = []
+        results[key].append(name)
+
+    # Print summary
+    print(f"Total items: {len(items)}\n")
+
+    # Group by sector
+    by_sector = {}
+    for (sector, btype), names in results.items():
+        if sector not in by_sector:
+            by_sector[sector] = {}
+        by_sector[sector][btype] = names
+
+    # Print sector summary
+    print("=" * 60)
+    print("SECTOR SUMMARY")
+    print("=" * 60)
+    for sector in sorted(by_sector.keys()):
+        total = sum(len(names) for names in by_sector[sector].values())
+        other_count = len(by_sector[sector].get('Other', []))
+        print(f"{sector}: {total} items ({other_count} in Other)")
+
+    print("\n" + "=" * 60)
+    print("DETAILED BREAKDOWN")
+    print("=" * 60)
+
+    for sector in sorted(by_sector.keys()):
+        print(f"\n### {sector} ###")
+        for btype in sorted(by_sector[sector].keys()):
+            names = by_sector[sector][btype]
+            print(f"  {btype}: {len(names)}")
+            if len(names) <= 10:
+                for name in sorted(names):
+                    print(f"    - {name}")
+            else:
+                for name in sorted(names)[:5]:
+                    print(f"    - {name}")
+                print(f"    ... and {len(names) - 5} more")
+
+
+if __name__ == '__main__':
+    main()
--- a/db/recategorize_other.py
+++ b/db/recategorize_other.py
@@ -0,0 +1,555 @@
+#!/usr/bin/env python3
+"""
+Recategorize items from Other.Uncategorized into appropriate existing categories.
+
+RULES:
+1. NEVER create new Level 1 (Sector) categories
+2. Only create new Level 2 (Business Type) if >10 items would use it
+3. Only create new Level 3 (Sub-category) if >5 items would use it
+4. Prefer matching to existing categories at all times
+5. If uncertain, leave in Other
+
+EXISTING SECTORS (21 non-Other):
+- Agriculture: Farming, Services
+- Automotive: Dealers, Fuel & Charging, Parking, Parts & Accessories, Rental Services, Repair & Maintenance, Training, Vehicle Care
+- Education: Arts Education, Early Childhood, Higher Education, K-12 Schools, Language Learning, Libraries, Professional Training, Specialty Schools, Sports Training, Technology Training, Tutoring, Vocational Training
+- Entertainment: Amusement, Arts, Fitness, Gambling, Games & Recreation, Movies, Museums, Music Venues, Parks, Performing Arts, Recreation, Social, Sports, Venues, Wildlife
+- Events_Weddings: Attire, Florists, Memorial, Planning, Rentals, Services, Venues
+- Finance_Insurance: Banking, Insurance, Investment, Lending, Money Services
+- Food_Dining: Bakeries & Desserts, Bars & Nightlife, Beverage Production, Cafes & Coffee, Food Services, Quick Service, Restaurants
+- Government: International, Legal, Local Government, Postal, Public Safety, Social Services, Transportation
+- Healthcare: Alternative Medicine, Clinics, Dental, Diagnostics, Emergency Services, Hospitals, Medical Practitioners, Mental Health, Pharmacies, Rehabilitation, Senior Care, Specialty Care, Veterinary, Vision Care
+- Home_Services: Appliance Repair, Cleaning, Construction, Design, Electrical, Flooring, General Repair, HVAC, Landscaping, Moving, Pest Control, Plumbing, Pool & Spa, Roofing, Security, Windows & Doors
+- Hospitality_Travel: Attractions, Lodging, Transportation, Travel Services
+- Industrial: Construction, Manufacturing, Mining
+- Non_Profit: Charities, Community, General, Professional
+- Personal_Services: Body Art, Clothing Care, Fitness, Hair Care, Laundry, Massage, Spa & Wellness
+- Pets_Animals: Animal Welfare, Pet Services
+- Professional_Services: Agencies, Business Services, Consulting, Creative Services, Design, Engineering, Financial Services, HR Services, Language Services, Legal, Marketing & Advertising
+- Real_Estate: Agencies, Commercial, Development, Management, Residential, Services, Storage
+- Religious: Buddhism, Christian, Hinduism, Islam, Judaism, Other
+- Retail_Shopping: Arts & Crafts, Beauty & Cosmetics, Books & Office, Clothing & Fashion, Electronics, Food & Grocery, Hardware & Building, Health & Pharmacy, Home & Garden, Jewelry & Watches, Markets, Music & Entertainment, Pet Supplies, Secondhand & Vintage, Specialty Retail, Sports & Outdoors, Toys & Hobbies, Wholesale & Distribution
+- Technology: IT Services, Infrastructure, Software, Telecommunications
+- Transportation: Delivery, Logistics, Passenger, Public Transit, Vehicle Services
+"""
+
+import psycopg2
+import re
+from collections import defaultdict
+
+# Database connection
+DB_URL = "postgresql://scraper:scraper123@localhost:5437/scraper"
+
+def slugify(text):
+    """Convert text to slug format"""
+    slug = re.sub(r'[^\w\s-]', '', text)
+    slug = re.sub(r'[-\s]+', '_', slug)
+    return slug.strip('_')
+
+# ==================== CATEGORIZATION RULES ====================
+# Format: (keyword_pattern, sector, business_type, sub_category)
+# Use regex patterns for flexibility
+
+CATEGORIZATION_RULES = [
+    # ==================== SPORTS & FITNESS (→ Entertainment.Sports or Entertainment.Fitness) ====================
+    # Sports clubs and facilities
+    (r'\b(basketball|baseball|football|soccer|tennis|golf|hockey|rugby|cricket|volleyball|badminton|squash|racquetball)\b.*(club|court|field|ground|stadium|arena|complex)', 'Entertainment', 'Sports', 'Facilities'),
+    (r'\b(swimming|diving|aquatic|pool)\b.*(club|center|pool|facility)', 'Entertainment', 'Sports', 'Aquatic'),
+    (r'\b(gym|fitness|workout|crossfit|aerobic|pilates|yoga|zumba)\b.*(center|studio|club|class)', 'Entertainment', 'Fitness', 'Studios'),
+    (r'\b(martial arts|karate|judo|taekwondo|aikido|boxing|kickboxing|mma|wrestling|fencing)\b.*(club|school|academy|dojo|studio)', 'Entertainment', 'Sports', 'Martial_Arts'),
+    (r'\b(archery|shooting|rifle|gun)\b.*(range|club|center)', 'Entertainment', 'Sports', 'Shooting'),
+    (r'\b(skateboard|skate park|bmx|cycling|bicycle)\b.*(park|venue|club|center)', 'Entertainment', 'Sports', 'Cycling_Skating'),
+    (r'\b(climbing|bouldering|rock climbing)\b.*(gym|wall|center|club)', 'Entertainment', 'Fitness', 'Climbing'),
+    (r'\b(dance|ballet|ballroom|salsa|tango)\b.*(studio|school|class|instructor)', 'Entertainment', 'Performing Arts', 'Dance'),
+    (r'\bsports\b.*(center|complex|facility|club)', 'Entertainment', 'Sports', 'General'),
+    (r'\bathletic\b.*(field|track|club|center)', 'Entertainment', 'Sports', 'Facilities'),
+    (r'\b(rowing|canoeing|kayaking|sailing|boat)\b.*(club|center|school)', 'Entertainment', 'Sports', 'Water_Sports'),
+    (r'\b(equestrian|horse|polo|riding)\b.*(club|center|school|stable|arena)', 'Entertainment', 'Sports', 'Equestrian'),
+    (r'\b(ski|snowboard|ice skating|ice rink)\b.*(resort|center|club|rink)', 'Entertainment', 'Sports', 'Winter_Sports'),
+
+    # Instructors and trainers
+    (r'\b(fitness|personal|sports|athletic)\b.*\b(trainer|instructor|coach)\b', 'Entertainment', 'Fitness', 'Trainers'),
+    (r'\baerobic.*instructor\b', 'Entertainment', 'Fitness', 'Trainers'),
+
+    # ==================== HEALTHCARE (various) ====================
+    # Medical specialists
+    (r'\b(allergist|anesthesiologist|cardiologist|dermatologist|endocrinologist|gastroenterologist|geriatrician|hematologist|immunologist|nephrologist|neurologist|oncologist|ophthalmologist|orthopedist|otolaryngologist|pathologist|pediatrician|physiatrist|podiatrist|proctologist|pulmonologist|radiologist|rheumatologist|urologist)\b', 'Healthcare', 'Medical Practitioners', 'Specialists'),
+    (r'\b(audiologist|speech therapist|occupational therapist|physical therapist)\b', 'Healthcare', 'Rehabilitation', 'Therapists'),
+    (r'\b(psychologist|psychiatrist|counselor|therapist)\b(?!.*massage)', 'Healthcare', 'Mental Health', 'Practitioners'),
+    (r'\b(chiropractor|osteopath|naturopath|homeopath|acupuncturist|herbalist)\b', 'Healthcare', 'Alternative Medicine', 'Practitioners'),
+    (r'\b(optometrist|optician)\b', 'Healthcare', 'Vision Care', 'Practitioners'),
+    (r'\b(medical|health)\b.*(center|clinic|office|practice)', 'Healthcare', 'Clinics', 'General'),
+    (r'\b(aged care|elder care|senior care|nursing home|assisted living|retirement)\b', 'Healthcare', 'Senior Care', 'Facilities'),
+    (r'\b(blood bank|blood donation|plasma)\b', 'Healthcare', 'Diagnostics', 'Blood_Services'),
+    (r'\b(dialysis|kidney)\b.*(center|clinic)', 'Healthcare', 'Specialty Care', 'Dialysis'),
+    (r'\b(fertility|ivf|reproductive)\b.*(clinic|center)', 'Healthcare', 'Specialty Care', 'Fertility'),
+    (r'\b(hospice|palliative)\b', 'Healthcare', 'Senior Care', 'Hospice'),
+    (r'\b(medical lab|laboratory|pathology|diagnostic)\b.*(center|lab)', 'Healthcare', 'Diagnostics', 'Labs'),
+    (r'\b(ambulance|emergency|paramedic|first aid)\b', 'Healthcare', 'Emergency Services', 'EMS'),
+
+    # ==================== AUTOMOTIVE (various) ====================
+    (r'\bauto\b.*(body|paint|dent|collision|restoration|upholster)', 'Automotive', 'Repair & Maintenance', 'Body_Work'),
+    (r'\bauto\b.*(repair|mechanic|service|tune.?up|brake|transmission|radiator)', 'Automotive', 'Repair & Maintenance', 'Mechanical'),
+    (r'\bauto\b.*(auction|broker|dealer)', 'Automotive', 'Dealers', 'Used_Vehicles'),
+    (r'\bauto\b.*(wrecker|salvage|junk|dismantl)', 'Automotive', 'Parts & Accessories', 'Salvage'),
+    (r'\b(car|vehicle|auto)\b.*(wash|detail|clean|wax)', 'Automotive', 'Vehicle Care', 'Cleaning'),
+    (r'\b(car|vehicle|auto)\b.*(rental|hire|lease)', 'Automotive', 'Rental Services', 'Vehicles'),
+    (r'\b(car|vehicle|auto)\b.*(storage|parking)', 'Automotive', 'Parking', 'Storage'),
+    (r'\b(motorcycle|motorbike|scooter|atv|quad)\b.*(dealer|shop|rental|repair)', 'Automotive', 'Dealers', 'Motorcycles'),
+    (r'\b(tire|tyre|wheel)\b.*(shop|store|service|dealer)', 'Automotive', 'Parts & Accessories', 'Tires'),
+    (r'\b(driving|driver)\b.*(school|training|instructor|lesson)', 'Automotive', 'Training', 'Driving_Schools'),
+    (r'\btruck\b.*(stop|dealer|rental|repair)', 'Automotive', 'Dealers', 'Trucks'),
+    (r'\b(rickshaw|auto rickshaw)\b', 'Transportation', 'Passenger', 'Local'),
+
+    # ==================== GOVERNMENT & MILITARY ====================
+    (r'\b(air force|army|navy|military|armed forces)\b.*(base|facility|office|recruitment)', 'Government', 'Public Safety', 'Military'),
+    (r'\b(police|sheriff|law enforcement)\b.*(station|department|office)', 'Government', 'Public Safety', 'Police'),
+    (r'\b(fire|firefighter)\b.*(station|department)', 'Government', 'Public Safety', 'Fire'),
+    (r'\b(court|courthouse|tribunal|judiciary)\b', 'Government', 'Legal', 'Courts'),
+    (r'\b(embassy|consulate|visa)\b.*(office|center)', 'Government', 'International', 'Diplomatic'),
+    (r'\b(city|town|municipal|county|district|borough)\b.*(hall|office|government|administration)', 'Government', 'Local Government', 'Offices'),
+    (r'\b(social services|welfare|unemployment|disability)\b.*(office|center)', 'Government', 'Social Services', 'Welfare'),
+    (r'\b(dmv|driver.*license|vehicle registration|motor vehicle)\b', 'Government', 'Transportation', 'DMV'),
+    (r'\b(passport|immigration|citizenship)\b.*(office|center)', 'Government', 'International', 'Immigration'),
+    (r'\b(aadhaar|agenzia entrate|tax)\b.*(office|center)', 'Government', 'Local Government', 'Tax'),
+    (r'\b(asylum|refugee)\b.*(center|office)', 'Government', 'Social Services', 'Refugee'),
+
+    # ==================== PETS & ANIMALS ====================
+    (r'\b(animal|pet)\b.*(shelter|rescue|adoption|welfare|pound|sanctuary)', 'Pets_Animals', 'Animal Welfare', 'Shelters'),
+    (r'\b(animal|pet)\b.*(hospital|clinic|vet|veterinary)', 'Healthcare', 'Veterinary', 'Clinics'),
+    (r'\b(animal|pet)\b.*(grooming|boarding|kennel|daycare|sitting|walking)', 'Pets_Animals', 'Pet Services', 'Care'),
+    (r'\b(animal|pet)\b.*(training|obedience|behavior)', 'Pets_Animals', 'Pet Services', 'Training'),
+    (r'\b(dog|cat|bird|fish|reptile|aquarium)\b.*(breeder|shop|store)', 'Retail_Shopping', 'Pet Supplies', 'Breeders'),
+    (r'\bzoo\b|aquarium|wildlife.*park|safari', 'Entertainment', 'Wildlife', 'Zoos'),
+
+    # ==================== RELIGIOUS ====================
+    (r'\b(church|chapel|cathedral|basilica|parish)\b', 'Religious', 'Christian', 'Churches'),
+    (r'\b(temple|mandir|hindu)\b', 'Religious', 'Hinduism', 'Temples'),
+    (r'\b(mosque|masjid|islamic)\b', 'Religious', 'Islam', 'Mosques'),
+    (r'\b(synagogue|jewish|judaism)\b', 'Religious', 'Judaism', 'Synagogues'),
+    (r'\b(buddhist|buddha|monastery|zen|meditation center)\b', 'Religious', 'Buddhism', 'Temples'),
+    (r'\b(ashram|spiritual|guru)\b', 'Religious', 'Other', 'Spiritual'),
+    (r'\b(baha.*i|sikh|gurdwara|shinto)\b', 'Religious', 'Other', 'Houses_of_Worship'),
+
+    # ==================== EDUCATION ====================
+    (r'\b(university|college|faculty|academic department)\b', 'Education', 'Higher Education', 'Universities'),
+    (r'\b(preschool|kindergarten|nursery|daycare|child.*care|creche)\b(?!.*animal)', 'Education', 'Early Childhood', 'Preschools'),
+    (r'\b(school|academy)\b(?!.*driving|.*martial|.*dance|.*music|.*art|.*beauty|.*cooking|.*flight)', 'Education', 'K-12 Schools', 'General'),
+    (r'\b(language|esl|english)\b.*(school|class|course|learning)', 'Education', 'Language Learning', 'Schools'),
+    (r'\b(art|drawing|painting)\b.*(school|class|studio)', 'Education', 'Arts Education', 'Visual_Arts'),
+    (r'\b(music|piano|guitar|violin|drum)\b.*(school|lesson|instructor|teacher)', 'Education', 'Arts Education', 'Music'),
+    (r'\b(acting|theater|drama)\b.*(school|class|academy)', 'Education', 'Arts Education', 'Performing'),
+    (r'\b(tutoring|tutor|coaching)\b.*(center|service)', 'Education', 'Tutoring', 'General'),
+    (r'\b(library|public library)\b', 'Education', 'Libraries', 'Public'),
+    (r'\b(archive|historical|museum)\b.*library', 'Education', 'Libraries', 'Special'),
+    (r'\b(vocational|trade|technical)\b.*(school|training|institute)', 'Education', 'Vocational Training', 'General'),
+    (r'\b(apprentice|internship)\b', 'Education', 'Vocational Training', 'Apprenticeships'),
+    (r'\b(flight|aviation|pilot)\b.*(school|training|academy)', 'Education', 'Specialty Schools', 'Aviation'),
+    (r'\b(cooking|culinary|chef)\b.*(school|class|academy)', 'Education', 'Specialty Schools', 'Culinary'),
+    (r'\b(beauty|cosmetology|esthetician)\b.*(school|academy)', 'Education', 'Specialty Schools', 'Beauty'),
+
+    # ==================== HOME SERVICES ====================
+    (r'\b(bathroom|kitchen)\b.*(remodel|renovation|contractor)', 'Home_Services', 'Construction', 'Remodeling'),
+    (r'\b(general|home)\b.*contractor', 'Home_Services', 'Construction', 'General'),
+    (r'\b(painter|painting)\b.*(contractor|service|company)(?!.*auto)', 'Home_Services', 'Construction', 'Painting'),
+    (r'\b(carpenter|carpentry|cabinet|woodwork)\b', 'Home_Services', 'Construction', 'Carpentry'),
+    (r'\b(mason|masonry|brick|concrete|stone)\b.*(contractor|service|company)', 'Home_Services', 'Construction', 'Masonry'),
+    (r'\b(electrician|electrical)\b.*(contractor|service|company)', 'Home_Services', 'Electrical', 'Contractors'),
+    (r'\b(plumber|plumbing)\b.*(contractor|service|company)', 'Home_Services', 'Plumbing', 'Contractors'),
+    (r'\b(hvac|heating|air conditioning|furnace)\b.*(contractor|service|company)', 'Home_Services', 'HVAC', 'Contractors'),
+    (r'\b(roofer|roofing)\b.*(contractor|service|company)', 'Home_Services', 'Roofing', 'Contractors'),
+    (r'\b(landscap|lawn|garden)\b.*(service|company|contractor)(?!.*store|.*center)', 'Home_Services', 'Landscaping', 'Services'),
+    (r'\b(pool|spa)\b.*(service|cleaning|maintenance|contractor)', 'Home_Services', 'Pool & Spa', 'Services'),
+    (r'\b(pest|exterminator|termite)\b.*(control|service)', 'Home_Services', 'Pest Control', 'Services'),
+    (r'\b(cleaning|maid|janitorial|housekeeping)\b.*(service|company)', 'Home_Services', 'Cleaning', 'Services'),
+    (r'\b(window)\b.*(cleaning|wash)', 'Home_Services', 'Cleaning', 'Window'),
+    (r'\b(appliance)\b.*(repair|service)', 'Home_Services', 'Appliance Repair', 'Services'),
+    (r'\b(handyman|odd job|home repair)\b', 'Home_Services', 'General Repair', 'Handyman'),
+    (r'\b(moving|movers|relocation)\b.*(company|service)', 'Home_Services', 'Moving', 'Services'),
+    (r'\b(locksmith)\b', 'Home_Services', 'Security', 'Locksmith'),
+    (r'\b(alarm|security system)\b.*(company|service|installer)', 'Home_Services', 'Security', 'Systems'),
+    (r'\b(arborist|tree)\b.*(service|removal|trimming)', 'Home_Services', 'Landscaping', 'Tree_Service'),
+    (r'\b(fence)\b.*(contractor|company|install)', 'Home_Services', 'Construction', 'Fencing'),
+    (r'\b(garage door)\b.*(service|repair|install)', 'Home_Services', 'General Repair', 'Garage_Doors'),
+    (r'\b(gutter)\b.*(cleaning|service|install)', 'Home_Services', 'Construction', 'Gutters'),
+    (r'\b(insulation)\b.*(contractor|company)', 'Home_Services', 'Construction', 'Insulation'),
+    (r'\b(deck|patio)\b.*(builder|contractor)', 'Home_Services', 'Construction', 'Outdoor'),
+    (r'\b(drywall|sheetrock)\b', 'Home_Services', 'Construction', 'Drywall'),
+    (r'\b(flooring|carpet|tile|hardwood)\b.*(install|contractor|company)(?!.*store)', 'Home_Services', 'Flooring', 'Installation'),
+    (r'\b(window|door)\b.*(install|replacement|contractor)', 'Home_Services', 'Windows & Doors', 'Installation'),
+    (r'\b(glass)\b.*(repair|replacement|company)(?!.*auto)', 'Home_Services', 'Windows & Doors', 'Glass'),
+    (r'\b(chimney)\b.*(sweep|cleaning|repair)', 'Home_Services', 'General Repair', 'Chimney'),
+    (r'\b(septic|sewer)\b.*(service|pumping|cleaning)', 'Home_Services', 'Plumbing', 'Septic'),
+    (r'\b(well)\b.*(drilling|service|pump)', 'Home_Services', 'Plumbing', 'Wells'),
+    (r'\b(solar)\b.*(install|contractor|company)', 'Home_Services', 'Electrical', 'Solar'),
+
+    # ==================== RETAIL & SHOPPING ====================
+    (r'\b(antique|vintage|secondhand|thrift|consignment|pawn)\b.*(shop|store)', 'Retail_Shopping', 'Secondhand & Vintage', 'Stores'),
+    (r'\b(auction)\b.*(house|company)', 'Retail_Shopping', 'Secondhand & Vintage', 'Auctions'),
+    (r'\b(art|craft|hobby)\b.*(supply|store|shop)', 'Retail_Shopping', 'Arts & Crafts', 'Supplies'),
+    (r'\b(toy|game|hobby)\b.*(store|shop)', 'Retail_Shopping', 'Toys & Hobbies', 'Stores'),
+    (r'\b(book|stationery|office supply)\b.*(store|shop)', 'Retail_Shopping', 'Books & Office', 'Stores'),
+    (r'\b(music|instrument|record|vinyl)\b.*(store|shop)', 'Retail_Shopping', 'Music & Entertainment', 'Stores'),
+    (r'\b(sporting|sports|outdoor|camping|fishing|hunting)\b.*(goods|store|shop)', 'Retail_Shopping', 'Sports & Outdoors', 'Stores'),
+    (r'\b(electronics|computer|phone|appliance)\b.*(store|shop|retailer)', 'Retail_Shopping', 'Electronics', 'Stores'),
+    (r'\b(furniture|home decor|bedding|mattress)\b.*(store|shop)', 'Retail_Shopping', 'Home & Garden', 'Stores'),
+    (r'\b(clothing|fashion|apparel|boutique|shoe)\b.*(store|shop)', 'Retail_Shopping', 'Clothing & Fashion', 'Stores'),
+    (r'\b(jewelry|watch|gem)\b.*(store|shop)', 'Retail_Shopping', 'Jewelry & Watches', 'Stores'),
+    (r'\b(hardware|tool|building supply|lumber)\b.*(store|shop)', 'Retail_Shopping', 'Hardware & Building', 'Stores'),
+    (r'\b(garden|nursery|plant)\b.*(center|store|shop)', 'Retail_Shopping', 'Home & Garden', 'Garden_Centers'),
+    (r'\b(pharmacy|drugstore)\b', 'Retail_Shopping', 'Health & Pharmacy', 'Pharmacies'),
+    (r'\b(cosmetic|beauty|makeup)\b.*(store|shop)', 'Retail_Shopping', 'Beauty & Cosmetics', 'Stores'),
+    (r'\b(grocery|supermarket|food|convenience)\b.*(store|market|shop)', 'Retail_Shopping', 'Food & Grocery', 'Stores'),
+    (r'\b(liquor|wine|beer|alcohol)\b.*(store|shop)', 'Retail_Shopping', 'Food & Grocery', 'Liquor'),
+    (r'\b(tobacco|cigar|vape|smoke)\b.*(shop|store)', 'Retail_Shopping', 'Specialty Retail', 'Tobacco'),
+    (r'\b(mobile phone|cell phone)\b.*(store|shop|dealer)', 'Retail_Shopping', 'Electronics', 'Phones'),
+    (r'\b(optical|eyewear|glasses|sunglass)\b.*(store|shop)', 'Retail_Shopping', 'Health & Pharmacy', 'Optical'),
+    (r'\b(florist|flower)\b.*(shop|store)', 'Events_Weddings', 'Florists', 'Shops'),
+    (r'\b(bridal|wedding)\b.*(shop|store|boutique)', 'Events_Weddings', 'Attire', 'Bridal'),
+    (r'\b(uniform|workwear)\b.*(store|shop)', 'Retail_Shopping', 'Clothing & Fashion', 'Specialty'),
+
+    # ==================== PROFESSIONAL SERVICES ====================
+    (r'\b(lawyer|attorney|law firm|legal)\b.*(office|firm|service)', 'Professional_Services', 'Legal', 'Firms'),
+    (r'\b(accountant|accounting|bookkeep|tax)\b.*(firm|service|office)(?!.*government)', 'Professional_Services', 'Financial Services', 'Accounting'),
+    (r'\b(architect|architecture)\b.*(firm|office|studio)', 'Professional_Services', 'Engineering', 'Architecture'),
+    (r'\b(engineer|engineering)\b.*(firm|office|company)', 'Professional_Services', 'Engineering', 'Firms'),
+    (r'\b(surveyor|surveying|land survey)\b', 'Professional_Services', 'Engineering', 'Surveying'),
+    (r'\b(consultant|consulting)\b.*(firm|company|service)', 'Professional_Services', 'Consulting', 'General'),
+    (r'\b(marketing|advertising|pr|public relations)\b.*(agency|firm|company)', 'Professional_Services', 'Marketing & Advertising', 'Agencies'),
+    (r'\b(graphic|web|design)\b.*(studio|agency|firm)', 'Professional_Services', 'Creative Services', 'Design'),
+    (r'\b(photography|photographer|video|videograph)\b.*(studio|service)', 'Professional_Services', 'Creative Services', 'Photography'),
+    (r'\b(translation|interpreter|language)\b.*service', 'Professional_Services', 'Language Services', 'Translation'),
+    (r'\b(staffing|recruiting|employment|hr)\b.*(agency|service|firm)', 'Professional_Services', 'HR Services', 'Agencies'),
+    (r'\b(notary|notarial)\b', 'Professional_Services', 'Legal', 'Notary'),
+    (r'\b(private investigator|detective)\b', 'Professional_Services', 'Agencies', 'Investigation'),
+    (r'\b(appraiser|appraisal|valuation)\b', 'Professional_Services', 'Financial Services', 'Appraisal'),
+    (r'\b(auditor|audit)\b.*(firm|service)', 'Professional_Services', 'Financial Services', 'Audit'),
+    (r'\b(courier|messenger|delivery)\b.*service', 'Transportation', 'Delivery', 'Courier'),
+
+    # ==================== ARTS & CULTURE ====================
+    (r'\b(art|gallery|exhibition)\b(?!.*supply|.*store|.*school)', 'Entertainment', 'Arts', 'Galleries'),
+    (r'\b(museum)\b', 'Entertainment', 'Museums', 'General'),
+    (r'\b(theater|theatre|playhouse|opera house)\b', 'Entertainment', 'Performing Arts', 'Venues'),
+    (r'\b(cinema|movie theater|multiplex)\b', 'Entertainment', 'Movies', 'Theaters'),
+    (r'\b(concert|music)\b.*(hall|venue)', 'Entertainment', 'Music Venues', 'Concert_Halls'),
+    (r'\b(band|orchestra|choir|ensemble)\b', 'Entertainment', 'Performing Arts', 'Groups'),
+    (r'\b(comedian|comedy club)\b', 'Entertainment', 'Performing Arts', 'Comedy'),
+    (r'\b(artist|sculptor|painter)\b(?!.*makeup)', 'Entertainment', 'Arts', 'Artists'),
+    (r'\b(animation|animator)\b.*(studio|company)', 'Professional_Services', 'Creative Services', 'Animation'),
+    (r'\b(recording|music)\b.*studio', 'Professional_Services', 'Creative Services', 'Recording'),
+    (r'\b(art restoration|restoration service)\b', 'Professional_Services', 'Creative Services', 'Restoration'),
+
+    # ==================== ENTERTAINMENT & RECREATION ====================
+    (r'\b(amusement|theme)\b.*park', 'Entertainment', 'Amusement', 'Parks'),
+    (r'\b(arcade|game center|gaming)\b', 'Entertainment', 'Games & Recreation', 'Arcades'),
+    (r'\b(escape room|puzzle room)\b', 'Entertainment', 'Games & Recreation', 'Escape_Rooms'),
+    (r'\b(bowling)\b.*(alley|center)', 'Entertainment', 'Games & Recreation', 'Bowling'),
+    (r'\b(billiard|pool hall|snooker)\b', 'Entertainment', 'Games & Recreation', 'Billiards'),
+    (r'\b(karaoke)\b', 'Entertainment', 'Music Venues', 'Karaoke'),
+    (r'\b(casino|gambling|betting)\b', 'Entertainment', 'Gambling', 'Casinos'),
+    (r'\b(nightclub|disco|club)\b(?!.*golf|.*country|.*tennis)', 'Food_Dining', 'Bars & Nightlife', 'Nightclubs'),
+    (r'\b(country club|private club|social club)\b', 'Entertainment', 'Social', 'Clubs'),
+    (r'\b(botanical garden|arboretum)\b', 'Entertainment', 'Parks', 'Gardens'),
+    (r'\b(park|playground|recreation area)\b(?!.*theme|.*water|.*trailer|.*mobile)', 'Entertainment', 'Parks', 'Public'),
+    (r'\b(beach|waterfront|marina)\b(?!.*hotel)', 'Entertainment', 'Parks', 'Beaches'),
+    (r'\b(campground|camping|rv park|caravan)\b', 'Hospitality_Travel', 'Lodging', 'Camping'),
+    (r'\b(go.?kart|kart|karting)\b', 'Entertainment', 'Games & Recreation', 'Karting'),
+    (r'\b(laser tag|paintball)\b', 'Entertainment', 'Games & Recreation', 'Adventure'),
+    (r'\b(trampoline|bounce|jump)\b.*(park|center)', 'Entertainment', 'Games & Recreation', 'Trampoline'),
+    (r'\b(mini golf|miniature golf|putt.?putt)\b', 'Entertainment', 'Games & Recreation', 'Mini_Golf'),
+    (r'\b(water park|aqua park)\b', 'Entertainment', 'Amusement', 'Water_Parks'),
+    (r'\b(haunted|horror)\b.*(house|attraction)', 'Entertainment', 'Amusement', 'Attractions'),
+    (r'\b(circus|carnival|fair)\b', 'Entertainment', 'Amusement', 'Shows'),
+    (r'\b(planetarium|observatory)\b', 'Entertainment', 'Museums', 'Science'),
+
+    # ==================== FOOD & DINING ====================
+    (r'\b(bar|pub|tavern|lounge|brewery|taproom|brewpub)\b(?!.*brow|.*eyebrow)', 'Food_Dining', 'Bars & Nightlife', 'Bars'),
+    (r'\b(cafe|coffee|espresso)\b.*(shop|house|bar)', 'Food_Dining', 'Cafes & Coffee', 'Cafes'),
+    (r'\b(restaurant|eatery|diner|bistro|brasserie|grill)\b', 'Food_Dining', 'Restaurants', 'General'),
+    (r'\b(bakery|patisserie|pastry)\b', 'Food_Dining', 'Bakeries & Desserts', 'Bakeries'),
+    (r'\b(ice cream|gelato|frozen yogurt|dessert)\b.*(shop|parlor|store)', 'Food_Dining', 'Bakeries & Desserts', 'Desserts'),
+    (r'\b(caterer|catering)\b', 'Food_Dining', 'Food Services', 'Catering'),
+    (r'\b(food truck|food cart)\b', 'Food_Dining', 'Quick Service', 'Mobile'),
+    (r'\b(juice|smoothie)\b.*(bar|shop)', 'Food_Dining', 'Cafes & Coffee', 'Juice'),
+    (r'\b(tea|bubble tea|boba)\b.*(shop|house|room)', 'Food_Dining', 'Cafes & Coffee', 'Tea'),
+    (r'\b(winery|vineyard|wine)\b.*(tasting|cellar)', 'Food_Dining', 'Beverage Production', 'Wineries'),
+    (r'\b(distillery|spirit)\b', 'Food_Dining', 'Beverage Production', 'Distilleries'),
+    (r'\b(butcher|meat)\b.*shop', 'Retail_Shopping', 'Food & Grocery', 'Butchers'),
+    (r'\b(fish|seafood)\b.*market', 'Retail_Shopping', 'Food & Grocery', 'Seafood'),
+    (r'\b(deli|delicatessen)\b', 'Retail_Shopping', 'Food & Grocery', 'Delis'),
+    (r'\b(candy|chocolate|sweet|confection)\b.*(shop|store)', 'Retail_Shopping', 'Food & Grocery', 'Confectionery'),
+
+    # ==================== PERSONAL SERVICES ====================
+    (r'\b(barber|hair)\b.*(shop|salon|stylist)', 'Personal_Services', 'Hair Care', 'Salons'),
+    (r'\b(beauty|nail|manicure|pedicure)\b.*(salon|spa|studio)', 'Personal_Services', 'Spa & Wellness', 'Beauty'),
+    (r'\b(tattoo|piercing|body art)\b.*(shop|studio|parlor)', 'Personal_Services', 'Body Art', 'Studios'),
+    (r'\b(massage)\b.*(therapist|spa|parlor|studio)', 'Personal_Services', 'Massage', 'Studios'),
+    (r'\b(spa|wellness|day spa)\b', 'Personal_Services', 'Spa & Wellness', 'Spas'),
+    (r'\b(tanning|sunbed)\b.*(salon|studio)', 'Personal_Services', 'Spa & Wellness', 'Tanning'),
+    (r'\b(laundry|laundromat|dry clean|tailor|alteration|seamstress)\b', 'Personal_Services', 'Laundry', 'Services'),
+    (r'\b(shoe repair|cobbler)\b', 'Personal_Services', 'Clothing Care', 'Shoe_Repair'),
+    (r'\b(brow|eyebrow|lash|eyelash)\b.*(bar|salon|studio)', 'Personal_Services', 'Spa & Wellness', 'Brows_Lashes'),
+    (r'\b(makeup artist|stylist)\b', 'Personal_Services', 'Spa & Wellness', 'Makeup'),
+    (r'\b(sauna|steam room|bathhouse|hammam)\b', 'Personal_Services', 'Spa & Wellness', 'Baths'),
+    (r'\b(waxing)\b.*(salon|studio)', 'Personal_Services', 'Spa & Wellness', 'Waxing'),
+
+    # ==================== HOSPITALITY & TRAVEL ====================
+    (r'\b(hotel|motel|inn|resort|hostel|lodge|bed and breakfast|b&b|guesthouse)\b', 'Hospitality_Travel', 'Lodging', 'Hotels'),
+    (r'\b(travel|tour)\b.*(agency|operator|company)', 'Hospitality_Travel', 'Travel Services', 'Agencies'),
+    (r'\b(airline|airport|aviation)\b(?!.*school)', 'Transportation', 'Passenger', 'Air'),
+    (r'\b(cruise|ferry)\b.*(line|terminal|port)', 'Transportation', 'Passenger', 'Water'),
+    (r'\b(train|rail)\b.*(station|service)', 'Transportation', 'Passenger', 'Rail'),
+    (r'\b(bus|coach)\b.*(station|terminal|service|company)', 'Transportation', 'Passenger', 'Bus'),
+    (r'\b(taxi|cab|ride|uber|lyft|limo|limousine|chauffeur)\b.*(service|company|stand)', 'Transportation', 'Passenger', 'Taxi'),
+    (r'\b(tourist|visitor)\b.*(information|center|bureau)', 'Hospitality_Travel', 'Travel Services', 'Information'),
+    (r'\b(rental)\b.*\b(cabin|cottage|vacation|holiday)\b', 'Hospitality_Travel', 'Lodging', 'Rentals'),
+
+    # ==================== INDUSTRIAL & MANUFACTURING ====================
+    (r'\b(factory|plant|mill|manufacturing)\b', 'Industrial', 'Manufacturing', 'General'),
+    (r'\b(warehouse|distribution|logistics)\b.*(center|facility)', 'Transportation', 'Logistics', 'Warehouses'),
+    (r'\b(machine|machinist|metalwork|welding|welder)\b.*(shop|company|service)', 'Industrial', 'Manufacturing', 'Metal'),
+    (r'\b(print|printing|press)\b.*(shop|company|service)', 'Industrial', 'Manufacturing', 'Printing'),
+    (r'\b(textile|fabric|garment)\b.*(factory|mill|manufacturer)', 'Industrial', 'Manufacturing', 'Textile'),
+    (r'\b(chemical|pharmaceutical)\b.*(company|manufacturer|plant)', 'Industrial', 'Manufacturing', 'Chemical'),
+    (r'\b(construction|building)\b.*(company|contractor|firm)', 'Industrial', 'Construction', 'General'),
+    (r'\b(quarry|gravel|sand|aggregate)\b', 'Industrial', 'Mining', 'Quarries'),
+    (r'\b(sawmill|lumber)\b.*(mill|yard)', 'Industrial', 'Manufacturing', 'Wood'),
+    (r'\b(steel|iron|aluminum)\b.*(plant|manufacturer|company)', 'Industrial', 'Manufacturing', 'Metal'),
+    (r'\b(packaging|container)\b.*(company|manufacturer)', 'Industrial', 'Manufacturing', 'Packaging'),
+    (r'\b(recycling|waste)\b.*(center|facility|company)', 'Industrial', 'Manufacturing', 'Recycling'),
+
+    # ==================== REAL ESTATE ====================
+    (r'\b(real estate|realtor|property)\b.*(agent|agency|company)', 'Real_Estate', 'Agencies', 'Agents'),
+    (r'\b(property management|apartment|rental)\b.*(company|agency)', 'Real_Estate', 'Management', 'Residential'),
+    (r'\b(storage|self storage|mini storage)\b.*(facility|unit)', 'Real_Estate', 'Storage', 'Self_Storage'),
+    (r'\b(office|commercial)\b.*(space|building|complex)', 'Real_Estate', 'Commercial', 'Office'),
+    (r'\b(apartment|condo|housing)\b.*(complex|building|community)', 'Real_Estate', 'Residential', 'Apartments'),
+    (r'\b(home builder|housing development)\b', 'Real_Estate', 'Development', 'Residential'),
+
+    # ==================== NON-PROFIT & COMMUNITY ====================
+    (r'\b(charity|charitable|foundation|fund)\b(?!.*investment)', 'Non_Profit', 'Charities', 'General'),
+    (r'\b(non.?profit|ngo|association)\b', 'Non_Profit', 'General', 'Organizations'),
+    (r'\b(community|civic|neighborhood)\b.*(center|organization|association)', 'Non_Profit', 'Community', 'Centers'),
+    (r'\b(youth|boys|girls|scout)\b.*(club|organization|center)', 'Non_Profit', 'Community', 'Youth'),
+    (r'\b(senior|elder)\b.*(center|club)(?!.*care)', 'Non_Profit', 'Community', 'Seniors'),
+    (r'\b(veterans|vfw|american legion)\b', 'Non_Profit', 'Community', 'Veterans'),
+    (r'\b(rotary|lions|kiwanis|elks|freemason|lodge)\b', 'Non_Profit', 'Community', 'Fraternal'),
+    (r'\b(union|labor)\b.*(hall|organization)', 'Non_Profit', 'Professional', 'Unions'),
+    (r'\b(chamber of commerce|business association)\b', 'Non_Profit', 'Professional', 'Business'),
+    (r'\b(aboriginal|indigenous|tribal)\b.*(organization|center)', 'Non_Profit', 'Community', 'Indigenous'),
+
+    # ==================== TECHNOLOGY ====================
+    (r'\b(software|app|web)\b.*(developer|development|company)', 'Technology', 'Software', 'Development'),
+    (r'\b(it|computer|tech)\b.*(service|support|repair)', 'Technology', 'IT Services', 'Support'),
+    (r'\b(data center|server|cloud)\b', 'Technology', 'Infrastructure', 'Data_Centers'),
+    (r'\b(internet|isp|broadband|telecom)\b.*(provider|service|company)', 'Technology', 'Telecommunications', 'Providers'),
+    (r'\b(bpo|call center|outsourc)\b', 'Technology', 'IT Services', 'BPO'),
+    (r'\b(automation|robot)\b.*(company|service)', 'Technology', 'Software', 'Automation'),
+
+    # ==================== FINANCE & INSURANCE ====================
+    (r'\b(bank|credit union|savings)\b(?!.*blood|.*food)', 'Finance_Insurance', 'Banking', 'Banks'),
+    (r'\b(atm|cash machine)\b', 'Finance_Insurance', 'Banking', 'ATMs'),
+    (r'\b(insurance)\b.*(agent|agency|company|broker)', 'Finance_Insurance', 'Insurance', 'Agents'),
+    (r'\b(mortgage|loan|lending|finance)\b.*(company|broker|service)', 'Finance_Insurance', 'Lending', 'Lenders'),
+    (r'\b(investment|wealth|portfolio|financial advisor)\b', 'Finance_Insurance', 'Investment', 'Advisors'),
+    (r'\b(money transfer|remittance|western union|moneygram)\b', 'Finance_Insurance', 'Money Services', 'Transfer'),
+    (r'\b(currency exchange|forex)\b', 'Finance_Insurance', 'Money Services', 'Exchange'),
+    (r'\b(bail bond)\b', 'Professional_Services', 'Legal', 'Bail'),
+
+    # ==================== EVENTS & WEDDINGS ====================
+    (r'\b(funeral|mortuary|cremation|cemetery|memorial)\b', 'Events_Weddings', 'Memorial', 'Funeral'),
+    (r'\b(event|party|wedding)\b.*(planner|planning|coordinator)', 'Events_Weddings', 'Planning', 'Planners'),
+    (r'\b(banquet|event|reception|wedding)\b.*(hall|venue|center)', 'Events_Weddings', 'Venues', 'Halls'),
+    (r'\b(dj|disc jockey|entertainment)\b.*service', 'Events_Weddings', 'Services', 'Entertainment'),
+    (r'\b(balloon|party supply|decoration)\b', 'Events_Weddings', 'Services', 'Decorations'),
+    (r'\b(tent|equipment)\b.*rental(?!.*car|.*truck)', 'Events_Weddings', 'Rentals', 'Equipment'),
+    (r'\b(photo booth|photobooth)\b', 'Events_Weddings', 'Services', 'Photography'),
+
+    # ==================== AGRICULTURE ====================
+    (r'\b(farm|ranch|orchard|vineyard)\b(?!.*winery)', 'Agriculture', 'Farming', 'Farms'),
+    (r'\b(agriculture|farming|crop)\b.*(service|supply|equipment)', 'Agriculture', 'Services', 'Supplies'),
+    (r'\b(livestock|cattle|poultry|dairy)\b', 'Agriculture', 'Farming', 'Livestock'),
+    (r'\b(nursery|greenhouse|horticulture)\b.*(wholesale|grower)', 'Agriculture', 'Farming', 'Horticulture'),
+    (r'\b(agistment|horse boarding|stable)\b', 'Agriculture', 'Services', 'Equine'),
+    (r'\b(veterinarian|vet)\b.*(livestock|farm|large animal)', 'Agriculture', 'Services', 'Veterinary'),
+
+    # ==================== TRANSPORTATION ====================
+    (r'\b(shipping|freight|cargo|trucking)\b.*(company|service)', 'Transportation', 'Logistics', 'Shipping'),
+    (r'\b(courier|messenger|express)\b.*(service|delivery)', 'Transportation', 'Delivery', 'Courier'),
+    (r'\b(airport|airfield|airstrip|heliport)\b', 'Transportation', 'Passenger', 'Airports'),
+    (r'\b(port|harbor|dock|pier|marina)\b(?!.*wine)', 'Transportation', 'Logistics', 'Ports'),
+    (r'\b(parking)\b.*(lot|garage|structure)', 'Automotive', 'Parking', 'Lots'),
+    (r'\b(towing|tow truck)\b', 'Transportation', 'Vehicle Services', 'Towing'),
+]
+
+def categorize_item(name):
+    """
+    Categorize a single item based on rules.
+    Returns (sector, business_type, sub_category) or None if no match.
+    """
+    name_lower = name.lower()
+
+    for pattern, sector, btype, subcat in CATEGORIZATION_RULES:
+        if re.search(pattern, name_lower, re.IGNORECASE):
+            return (sector, btype, subcat)
+
+    return None
+
+def get_existing_paths(cursor):
+    """Get all existing paths in the database"""
+    cursor.execute("SELECT path::text FROM gbp_categories")
+    return {row[0] for row in cursor.fetchall()}
+
+def get_or_create_path(cursor, sector, btype, subcat, existing_paths):
+    """
+    Get or create the full path for a category.
+    Returns the parent path (level 3) for the item.
+    """
+    sector_slug = slugify(sector)
+    btype_slug = slugify(btype)
+    subcat_slug = slugify(subcat)
+
+    # Level 1: Sector
+    sector_path = sector_slug
+    if sector_path not in existing_paths:
+        # Don't create new sectors - return None
+        print(f"  [SKIP] Would need new sector: {sector_path}")
+        return None
+
+    # Level 2: Business Type
+    btype_path = f"{sector_path}.{btype_slug}"
+    if btype_path not in existing_paths:
+        # Create new business type
+        cursor.execute("""
+            INSERT INTO gbp_categories (name, slug, path, level, parent_id, category_count)
+            SELECT %s, %s, %s::ltree, 2, id, 0
+            FROM gbp_categories WHERE path = %s::ltree
+            ON CONFLICT (path) DO NOTHING
+            RETURNING id
+        """, (btype, btype_slug, btype_path, sector_path))
+        result = cursor.fetchone()
+        if result:
+            existing_paths.add(btype_path)
+            print(f"  [NEW] Created business type: {btype_path}")
+
+    # Level 3: Sub-category
+    subcat_path = f"{btype_path}.{subcat_slug}"
+    if subcat_path not in existing_paths:
+        # Create new sub-category
+        cursor.execute("""
+            INSERT INTO gbp_categories (name, slug, path, level, parent_id, category_count)
+            SELECT %s, %s, %s::ltree, 3, id, 0
+            FROM gbp_categories WHERE path = %s::ltree
+            ON CONFLICT (path) DO NOTHING
+            RETURNING id
+        """, (subcat, subcat_slug, subcat_path, btype_path))
+        result = cursor.fetchone()
+        if result:
+            existing_paths.add(subcat_path)
+            print(f"  [NEW] Created sub-category: {subcat_path}")
+
+    return subcat_path
+
+def main():
+    conn = psycopg2.connect(DB_URL)
+    cursor = conn.cursor()
+
+    # Get all items in Other.Uncategorized
+    cursor.execute("""
+        SELECT id, name, slug
+        FROM gbp_categories
+        WHERE path ~ 'Other.Uncategorized.*' AND level = 4
+        ORDER BY name
+    """)
+    other_items = cursor.fetchall()
+    print(f"Found {len(other_items)} items in Other.Uncategorized\n")
+
+    # Get existing paths
+    existing_paths = get_existing_paths(cursor)
+
+    # Categorize items
+    categorized = []
+    uncategorized = []
+    category_counts = defaultdict(int)
+
+    for item_id, name, slug in other_items:
+        result = categorize_item(name)
+        if result:
+            sector, btype, subcat = result
+            categorized.append((item_id, name, slug, sector, btype, subcat))
+            category_counts[(sector, btype, subcat)] += 1
+        else:
+            uncategorized.append((item_id, name))
+
+    print(f"Categorized: {len(categorized)}")
+    print(f"Still uncategorized: {len(uncategorized)}")
+    print()
+
+    # Show category distribution
+    print("Category distribution:")
+    for (sector, btype, subcat), count in sorted(category_counts.items(), key=lambda x: -x[1])[:30]:
+        print(f"  {sector}.{btype}.{subcat}: {count}")
+    print()
+
+    # Show some uncategorized items
+    print("Sample uncategorized items (first 50):")
+    for item_id, name in uncategorized[:50]:
+        print(f"  - {name}")
+    print()
+
+    # Ask for confirmation
+    response = input("Proceed with database updates? (yes/no): ")
+    if response.lower() != 'yes':
+        print("Aborted.")
+        conn.close()
+        return
+
+    # Update database
+    updated = 0
+    for item_id, name, slug, sector, btype, subcat in categorized:
+        parent_path = get_or_create_path(cursor, sector, btype, subcat, existing_paths)
+        if parent_path:
+            new_path = f"{parent_path}.{slug}"
+            # Update the item
+            cursor.execute("""
+                UPDATE gbp_categories
+                SET path = %s::ltree,
+                    parent_id = (SELECT id FROM gbp_categories WHERE path = %s::ltree)
+                WHERE id = %s
+            """, (new_path, parent_path, item_id))
+            updated += 1
+
+    # Update category counts
+    cursor.execute("""
+        WITH counts AS (
+            SELECT
+                parent_id,
+                COUNT(*) as cnt
+            FROM gbp_categories
+            WHERE parent_id IS NOT NULL
+            GROUP BY parent_id
+        )
+        UPDATE gbp_categories g
+        SET category_count = COALESCE(c.cnt, 0)
+        FROM counts c
+        WHERE g.id = c.parent_id
+    """)
+
+    conn.commit()
+    print(f"\nUpdated {updated} items")
+
+    # Show final stats
+    cursor.execute("""
+        SELECT path, name, category_count
+        FROM gbp_categories
+        WHERE level = 1
+        ORDER BY category_count DESC
+    """)
+    print("\nFinal sector counts:")
+    for path, name, count in cursor.fetchall():
+        print(f"  {name}: {count}")
+
+    conn.close()
+
+if __name__ == '__main__':
+    main()
--- a/db_backup_20260201_1712.sql
+++ b/db_backup_20260201_1712.sql
--- a/docker-compose.nuc.yml
+++ b/docker-compose.nuc.yml
@@ -0,0 +1,25 @@
+# NUC Docker Compose Override
+# Uses NUC-hosted PostgreSQL instead of local container
+#
+# Usage:
+#   cp .env.nuc .env
+#   docker compose -f docker-compose.production.yml -f docker-compose.nuc.yml up -d
+#
+# This removes the local db service and connects API to NUC database
+
+version: '3.8'
+
+services:
+  # Disable local database (using NUC instead)
+  db:
+    profiles:
+      - disabled
+
+  # API Server - connect to NUC database
+  api:
+    environment:
+      - DATABASE_URL=postgresql://scraper:scraper_nuc_2026@192.168.1.3:5437/scraper
+      - REVIEWIQ_DATABASE_URL=postgresql://scraper:scraper_nuc_2026@192.168.1.3:5437/scraper
+    depends_on: []  # Remove db dependency
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
--- a/docker-compose.production.yml
+++ b/docker-compose.production.yml
@@ -10,9 +10,10 @@ services:
      POSTGRES_USER: scraper
      POSTGRES_PASSWORD: ${DB_PASSWORD:-scraper123}
    ports:
-      - "5435:5432"
+      - "5437:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
+      - ./db/init:/docker-entrypoint-initdb.d:ro
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U scraper"]
      interval: 10s
@@ -29,8 +30,8 @@ services:
    container_name: scraper-api
    environment:
      - DATABASE_URL=postgresql://scraper:${DB_PASSWORD:-scraper123}@db:5432/scraper
-      - API_BASE_URL=${API_BASE_URL:-http://localhost:8000}
-      - PORT=8000
+      - API_BASE_URL=${API_BASE_URL:-http://localhost:8001}
+      - PORT=8001
      - MAX_CONCURRENT_JOBS=${MAX_CONCURRENT_JOBS:-5}
      - CANARY_TEST_URL=${CANARY_TEST_URL:-https://www.google.com/maps/place/Soho+Factory/@54.6738155,25.2595844,17z/}
      - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-}
@@ -48,7 +49,7 @@ services:
      - ./packages:/app/packages:ro
      - ./api:/app/api:ro
    ports:
-      - "8000:8000"
+      - "8001:8001"
      - "5900:5900"   # VNC port (for VNC client)
      - "6080:6080"   # noVNC web interface (browser access)
    depends_on:
--- a/migrations/versions/008_add_job_id_to_pipeline_tables.sql
+++ b/migrations/versions/008_add_job_id_to_pipeline_tables.sql
@@ -0,0 +1,35 @@
+-- =============================================================================
+-- Migration: 008_add_job_id_to_pipeline_tables.sql
+-- Purpose: Add job_id column to pipeline tables for filtering by execution
+-- =============================================================================
+
+-- Add job_id to reviews_enriched
+ALTER TABLE pipeline.reviews_enriched
+ADD COLUMN IF NOT EXISTS job_id UUID;
+
+-- Add index for job_id on reviews_enriched
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_job_id
+ON pipeline.reviews_enriched(job_id)
+WHERE job_id IS NOT NULL;
+
+-- Add job_id to review_spans
+ALTER TABLE pipeline.review_spans
+ADD COLUMN IF NOT EXISTS job_id UUID;
+
+-- Add index for job_id on review_spans
+CREATE INDEX IF NOT EXISTS idx_review_spans_job_id
+ON pipeline.review_spans(job_id)
+WHERE job_id IS NOT NULL;
+
+-- Add job_id to issues
+ALTER TABLE pipeline.issues
+ADD COLUMN IF NOT EXISTS job_id UUID;
+
+-- Add index for job_id on issues
+CREATE INDEX IF NOT EXISTS idx_issues_job_id
+ON pipeline.issues(job_id)
+WHERE job_id IS NOT NULL;
+
+COMMENT ON COLUMN pipeline.reviews_enriched.job_id IS 'Scraper job ID for filtering by execution';
+COMMENT ON COLUMN pipeline.review_spans.job_id IS 'Scraper job ID for filtering by execution';
+COMMENT ON COLUMN pipeline.issues.job_id IS 'Scraper job ID for filtering by execution';
--- a/migrations/versions/009_add_urt_subcodes_table.sql
+++ b/migrations/versions/009_add_urt_subcodes_table.sql
@@ -0,0 +1,174 @@
+-- =============================================================================
+-- Migration: 009_add_urt_subcodes_table.sql
+-- Purpose: Add urt_subcodes table with human-readable names and definitions
+-- =============================================================================
+
+-- URT Tier-3 subcodes lookup table
+CREATE TABLE IF NOT EXISTS pipeline.urt_subcodes (
+    code VARCHAR(6) PRIMARY KEY,
+    category_code VARCHAR(2) NOT NULL REFERENCES pipeline.urt_categories(code),
+    domain_code CHAR(1) NOT NULL REFERENCES pipeline.urt_domains(code),
+    name VARCHAR(100) NOT NULL,
+    definition TEXT,
+    positive_example TEXT,
+    negative_example TEXT
+);
+
+-- Index for lookups
+CREATE INDEX IF NOT EXISTS idx_urt_subcodes_category ON pipeline.urt_subcodes(category_code);
+CREATE INDEX IF NOT EXISTS idx_urt_subcodes_domain ON pipeline.urt_subcodes(domain_code);
+
+COMMENT ON TABLE pipeline.urt_subcodes IS 'URT v5.1 Tier-3 diagnostic subcodes with definitions';
+
+-- Insert subcode data
+INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition, positive_example, negative_example) VALUES
+-- O1: Core Product/Service (Function)
+('O1.01', 'O1', 'O', 'Works/Doesn''t Work', 'Basic functionality success or failure', 'Software runs perfectly', 'Car won''t start'),
+('O1.02', 'O1', 'O', 'Performance Level', 'How well it operates', 'Incredibly fast processor', 'Sluggish and laggy'),
+('O1.03', 'O1', 'O', 'Durability', 'Longevity and resistance to wear', 'Still perfect after 5 years', 'Fell apart in a month'),
+('O1.04', 'O1', 'O', 'Reliability', 'Consistency of function over time', 'Never fails me', 'Works sometimes, not others'),
+('O1.05', 'O1', 'O', 'Outcome Achievement', 'Did customer accomplish their goal?', 'Passed my exam!', 'Treatment didn''t work'),
+
+-- O2: Product Features (Quality)
+('O2.01', 'O2', 'O', 'Materials/Inputs', 'Quality of components or ingredients', 'Real leather, premium feel', 'Cheap plastic parts'),
+('O2.02', 'O2', 'O', 'Craftsmanship', 'Skill of construction or execution', 'Beautifully sewn seams', 'Sloppy assembly'),
+('O2.03', 'O2', 'O', 'Presentation', 'Visual and aesthetic quality', 'Gorgeous plating', 'Looked thrown together'),
+('O2.04', 'O2', 'O', 'Attention to Detail', 'Finishing touches and refinement', 'Every corner perfect', 'Full of typos'),
+('O2.05', 'O2', 'O', 'Condition at Delivery', 'State when received', 'Still warm from oven', 'Arrived damaged'),
+
+-- O3: Variety & Selection (Completeness)
+('O3.01', 'O3', 'O', 'All Components Present', 'Nothing missing from what was promised', 'Everything in the box', 'Missing the charger'),
+('O3.02', 'O3', 'O', 'Feature Availability', 'Promised features actually work', 'All menu items available', 'Half the features disabled'),
+('O3.03', 'O3', 'O', 'Scope Delivery', 'Full scope of work completed', 'Cleaned entire house', 'Left the bathrooms'),
+('O3.04', 'O3', 'O', 'Documentation', 'Supporting materials provided', 'Great user manual', 'No instructions at all'),
+
+-- O4: Customization (Fit)
+('O4.01', 'O4', 'O', 'Specification Match', 'Matches what was ordered', 'Exactly what I ordered', 'Wrong size delivered'),
+('O4.02', 'O4', 'O', 'Personalization', 'Adapted to individual preferences', 'Remembered my usual', 'No way to save prefs'),
+('O4.03', 'O4', 'O', 'Flexibility', 'Can be modified or adjusted', 'Happy to substitute', 'No modifications allowed'),
+('O4.04', 'O4', 'O', 'Appropriateness', 'Right solution for the need', 'Perfect recommendation', 'Sold me wrong thing'),
+
+-- P1: Friendliness (Attitude)
+('P1.01', 'P1', 'P', 'Warmth', 'Friendly and welcoming manner', 'Made me feel welcome', 'Cold and unfriendly'),
+('P1.02', 'P1', 'P', 'Respect', 'Treated with dignity', 'Very respectful service', 'Rude and dismissive'),
+('P1.03', 'P1', 'P', 'Patience', 'Calm and tolerant approach', 'Patient with my questions', 'Rushed and impatient'),
+('P1.04', 'P1', 'P', 'Enthusiasm', 'Energy and engagement', 'Really passionate about helping', 'Seemed bored and disinterested'),
+
+-- P2: Helpfulness (Competence)
+('P2.01', 'P2', 'P', 'Knowledge', 'Expertise and understanding', 'Knew everything about the product', 'Had no idea what they were doing'),
+('P2.02', 'P2', 'P', 'Skill', 'Technical ability', 'Expertly handled the issue', 'Completely incompetent'),
+('P2.03', 'P2', 'P', 'Problem Solving', 'Ability to find solutions', 'Found a creative solution', 'Couldn''t figure it out'),
+
+-- P3: Professionalism (Responsiveness)
+('P3.01', 'P3', 'P', 'Attentiveness', 'Being present and engaged', 'Always attentive to needs', 'Ignored me completely'),
+('P3.02', 'P3', 'P', 'Initiative', 'Proactive help', 'Anticipated my needs', 'Had to ask for everything'),
+('P3.03', 'P3', 'P', 'Follow-through', 'Completing promised actions', 'Did exactly what they promised', 'Never followed up'),
+
+-- P4: Knowledge & Expertise (Communication)
+('P4.01', 'P4', 'P', 'Clarity', 'Clear communication', 'Explained everything clearly', 'Confusing and unclear'),
+('P4.02', 'P4', 'P', 'Listening', 'Understanding customer needs', 'Really listened to me', 'Didn''t listen at all'),
+('P4.03', 'P4', 'P', 'Transparency', 'Honest and open', 'Upfront about everything', 'Hid information from me'),
+
+-- J1: Wait Times
+('J1.01', 'J1', 'J', 'Speed', 'How fast things happen', 'Super fast service', 'Took forever'),
+('J1.02', 'J1', 'J', 'Punctuality', 'On-time delivery', 'Arrived exactly when promised', 'Two hours late'),
+('J1.03', 'J1', 'J', 'Queue Management', 'Handling of waiting customers', 'Well-organized queue', 'Chaotic and disorganized'),
+
+-- J2: Booking & Reservations (Ease)
+('J2.01', 'J2', 'J', 'Simplicity', 'Easy process', 'Super easy to book', 'Complicated process'),
+('J2.02', 'J2', 'J', 'Friction', 'Obstacles encountered', 'Seamless experience', 'So many hoops to jump through'),
+('J2.03', 'J2', 'J', 'Navigation', 'Finding what you need', 'Easy to navigate', 'Got lost multiple times'),
+
+-- J3: Navigation & Convenience (Reliability)
+('J3.01', 'J3', 'J', 'Consistency', 'Same experience every time', 'Always consistent', 'Different every visit'),
+('J3.02', 'J3', 'J', 'Accuracy', 'Getting it right', 'Perfect every time', 'Full of errors'),
+('J3.03', 'J3', 'J', 'Uptime', 'System availability', 'Never down', 'Constantly having issues'),
+
+-- J4: Accessibility (Resolution)
+('J4.01', 'J4', 'J', 'Problem Recognition', 'Acknowledging issues', 'Immediately acknowledged the issue', 'Denied there was a problem'),
+('J4.02', 'J4', 'J', 'Resolution Speed', 'How fast problems get fixed', 'Fixed immediately', 'Took weeks to resolve'),
+('J4.03', 'J4', 'J', 'Resolution Fairness', 'Fair handling of issues', 'Very fair resolution', 'Unfair treatment'),
+('J4.04', 'J4', 'J', 'Resolution Quality', 'How well problems are fixed', 'Completely resolved', 'Problem still exists'),
+
+-- E1: Physical Environment
+('E1.01', 'E1', 'E', 'Cleanliness', 'How clean the space is', 'Spotlessly clean', 'Dirty and gross'),
+('E1.02', 'E1', 'E', 'Comfort', 'Physical comfort', 'Very comfortable seating', 'Uncomfortable chairs'),
+('E1.03', 'E1', 'E', 'Space Design', 'Layout and organization', 'Well-designed layout', 'Cramped and cluttered'),
+('E1.04', 'E1', 'E', 'Maintenance', 'State of repair', 'Everything well-maintained', 'Falling apart'),
+
+-- E2: Ambiance & Atmosphere
+('E2.01', 'E2', 'E', 'Lighting', 'Light quality and level', 'Perfect lighting', 'Too dark/bright'),
+('E2.02', 'E2', 'E', 'Sound/Noise', 'Audio environment', 'Nice music', 'Too loud'),
+('E2.03', 'E2', 'E', 'Temperature', 'Climate control', 'Perfect temperature', 'Freezing/boiling'),
+('E2.04', 'E2', 'E', 'Smell', 'Odors and scents', 'Smelled wonderful', 'Bad odors'),
+
+-- E3: Cleanliness
+('E3.01', 'E3', 'E', 'Interface Design', 'Digital UX/UI', 'Beautiful interface', 'Ugly and confusing'),
+('E3.02', 'E3', 'E', 'App/Website Speed', 'Digital performance', 'Fast and responsive', 'Slow and laggy'),
+('E3.03', 'E3', 'E', 'Usability', 'Ease of digital use', 'Intuitive to use', 'Impossible to figure out'),
+
+-- E4: Digital Experience
+('E4.01', 'E4', 'E', 'Safety', 'Physical safety', 'Felt completely safe', 'Felt unsafe'),
+('E4.02', 'E4', 'E', 'Security', 'Protection of belongings/data', 'Very secure', 'Security concerns'),
+('E4.03', 'E4', 'E', 'Health/Hygiene', 'Health standards', 'Very hygienic', 'Health code violations'),
+
+-- A1: Friendliness (Availability)
+('A1.01', 'A1', 'A', 'Hours', 'Operating hours', 'Great hours', 'Never open when I need them'),
+('A1.02', 'A1', 'A', 'Booking Availability', 'Appointment slots', 'Easy to get an appointment', 'Booked for months'),
+('A1.03', 'A1', 'A', 'Inventory', 'Product availability', 'Always in stock', 'Always out of stock'),
+
+-- A2: Helpfulness (Accessibility)
+('A2.01', 'A2', 'A', 'Physical Access', 'Mobility accessibility', 'Wheelchair accessible', 'Not accessible'),
+('A2.02', 'A2', 'A', 'Language Access', 'Language accommodation', 'Multiple languages available', 'English only'),
+('A2.03', 'A2', 'A', 'Digital Accessibility', 'Screen reader/a11y', 'Accessible website', 'Can''t use with screen reader'),
+
+-- A3: Professionalism (Inclusivity)
+('A3.01', 'A3', 'A', 'Diversity Welcome', 'All backgrounds welcome', 'Very inclusive', 'Felt unwelcome'),
+('A3.02', 'A3', 'A', 'Accommodation', 'Special needs accommodation', 'Very accommodating', 'No accommodations available'),
+
+-- A4: Knowledge & Expertise (Convenience)
+('A4.01', 'A4', 'A', 'Location', 'Physical location convenience', 'Great location', 'Hard to get to'),
+('A4.02', 'A4', 'A', 'Parking', 'Parking availability', 'Easy parking', 'No parking'),
+('A4.03', 'A4', 'A', 'Multiple Channels', 'Ways to engage', 'Many ways to reach them', 'Only one contact method'),
+
+-- V1: Value Perception (Price)
+('V1.01', 'V1', 'V', 'Price Level', 'Cost amount', 'Very affordable', 'Way too expensive'),
+('V1.02', 'V1', 'V', 'Price Fairness', 'Fair for what you get', 'Fair price', 'Overpriced'),
+('V1.03', 'V1', 'V', 'Hidden Costs', 'Unexpected charges', 'No hidden fees', 'Lots of hidden charges'),
+
+-- V2: Pricing Structure (Transparency)
+('V2.01', 'V2', 'V', 'Clear Pricing', 'Easy to understand costs', 'Clear pricing', 'Confusing pricing'),
+('V2.02', 'V2', 'V', 'Honest Billing', 'Accurate charges', 'Bill was accurate', 'Charged more than quoted'),
+('V2.03', 'V2', 'V', 'Policy Clarity', 'Clear terms and conditions', 'Clear policies', 'Hidden in fine print'),
+('V2.04', 'V2', 'V', 'Policy Fairness', 'Fair rules and terms', 'Fair policies', 'Unfair terms'),
+
+-- V3: Promotions & Deals (Effort)
+('V3.01', 'V3', 'V', 'Time Investment', 'Time required', 'Quick and easy', 'Took way too long'),
+('V3.02', 'V3', 'V', 'Hassle Factor', 'Difficulty and inconvenience', 'No hassle', 'Such a hassle'),
+('V3.03', 'V3', 'V', 'Mental Load', 'Cognitive effort required', 'Easy to understand', 'Too complicated'),
+
+-- V4: Payment Process (Worth)
+('V4.01', 'V4', 'V', 'Value for Money', 'Worth what you paid', 'Great value', 'Not worth the money'),
+('V4.02', 'V4', 'V', 'ROI', 'Return on investment', 'Excellent return', 'Waste of money'),
+('V4.03', 'V4', 'V', 'Overall Satisfaction', 'Happy with the exchange', 'Very satisfied', 'Totally unsatisfied'),
+
+-- R1: Loyalty (Integrity)
+('R1.01', 'R1', 'R', 'Honesty', 'Truthfulness', 'Always honest', 'Lied to me'),
+('R1.02', 'R1', 'R', 'Ethics', 'Ethical behavior', 'Ethical practices', 'Unethical behavior'),
+('R1.03', 'R1', 'R', 'Promises Kept', 'Following through on promises', 'Kept all promises', 'Broke their promise'),
+
+-- R2: Trust (Dependability)
+('R2.01', 'R2', 'R', 'Consistency', 'Reliable over time', 'Always reliable', 'Inconsistent'),
+('R2.02', 'R2', 'R', 'Trustworthiness', 'Can be trusted', 'Completely trustworthy', 'Can''t be trusted'),
+('R2.03', 'R2', 'R', 'Accountability', 'Takes responsibility', 'Takes responsibility', 'Blames others'),
+
+-- R3: Consistency (Recovery)
+('R3.01', 'R3', 'R', 'Error Acknowledgment', 'Admits mistakes', 'Quickly admitted the mistake', 'Denied the mistake'),
+('R3.02', 'R3', 'R', 'Apology Quality', 'Sincere apologies', 'Sincere apology', 'Insincere/no apology'),
+('R3.03', 'R3', 'R', 'Making It Right', 'Correcting mistakes', 'Made it right', 'Didn''t fix anything'),
+
+-- R4: Personalization (Loyalty)
+('R4.01', 'R4', 'R', 'Customer Recognition', 'Remembers customers', 'Remembered me', 'Treated like a stranger'),
+('R4.02', 'R4', 'R', 'Loyalty Rewards', 'Rewards for loyalty', 'Great loyalty program', 'No recognition for loyalty'),
+('R4.03', 'R4', 'R', 'Long-term Relationship', 'Builds relationships', 'Values the relationship', 'Just another number')
+ON CONFLICT (code) DO NOTHING;
--- a/migrations/versions/010_add_solution_to_urt_subcodes.sql
+++ b/migrations/versions/010_add_solution_to_urt_subcodes.sql
@@ -0,0 +1,31 @@
+-- Migration: Add solution column to urt_subcodes
+-- Version: 010
+-- Date: 2026-01-25
+-- Description: Add solution column to store actionable recommendations for each URT subcode
+
+-- Add solution column for actionable business recommendations
+ALTER TABLE pipeline.urt_subcodes
+ADD COLUMN IF NOT EXISTS solution TEXT;
+
+-- Add comment describing the column
+COMMENT ON COLUMN pipeline.urt_subcodes.solution IS
+'Actionable business recommendation for addressing issues related to this subcode';
+
+-- Also add marketing_angle column for strengths
+ALTER TABLE pipeline.urt_subcodes
+ADD COLUMN IF NOT EXISTS marketing_angle TEXT;
+
+COMMENT ON COLUMN pipeline.urt_subcodes.marketing_angle IS
+'Marketing suggestion when this subcode appears as a strength (high positive sentiment)';
+
+-- Add complexity column to help with opportunity matrix
+ALTER TABLE pipeline.urt_subcodes
+ADD COLUMN IF NOT EXISTS solution_complexity VARCHAR(10) DEFAULT 'medium';
+
+COMMENT ON COLUMN pipeline.urt_subcodes.solution_complexity IS
+'Complexity of implementing the solution: simple, medium, complex';
+
+-- Add constraint for valid complexity values
+ALTER TABLE pipeline.urt_subcodes
+ADD CONSTRAINT valid_solution_complexity
+CHECK (solution_complexity IN ('simple', 'medium', 'complex'));
--- a/migrations/versions/011_populate_urt_solutions.py
+++ b/migrations/versions/011_populate_urt_solutions.py
@@ -0,0 +1,210 @@
+#!/usr/bin/env python3
+"""
+Generate SQL to populate URT subcodes with solutions, marketing angles, and complexity.
+Parses B1-urt-codes.yaml and creates actionable recommendations.
+
+Usage:
+    python 011_populate_urt_solutions.py > 011_populate_urt_solutions.sql
+    # Then run the SQL against the database
+"""
+
+import yaml
+from pathlib import Path
+
+# Load the URT taxonomy
+URT_YAML = Path(__file__).parent.parent.parent / "urt-taxonomy" / "track-b-engineering" / "B1-urt-codes.yaml"
+
+# Solution templates based on domain and common patterns
+SOLUTION_TEMPLATES = {
+    # Offering (O) - Product/Operations solutions
+    "O1.01": ("Implement quality testing before delivery. Create incident response process for functionality failures.", "Our products work reliably - backed by rigorous quality testing.", "medium"),
+    "O1.02": ("Optimize performance through benchmarking and monitoring. Set performance SLAs.", "Experience lightning-fast performance that exceeds expectations.", "complex"),
+    "O1.03": ("Use higher quality materials. Extend warranty coverage. Implement durability testing.", "Built to last - quality materials that stand the test of time.", "medium"),
+    "O1.04": ("Implement regular maintenance schedules. Add redundancy for critical systems.", "Dependable reliability you can count on, every time.", "medium"),
+    "O1.05": ("Track outcome metrics. Follow up on customer goals. Provide success coaching.", "We measure success by YOUR results, not just our delivery.", "medium"),
+    "O2.01": ("Upgrade to premium materials/ingredients. Source from quality suppliers.", "Premium materials and ingredients you can see and feel.", "medium"),
+    "O2.02": ("Invest in craftsman training. Implement quality checkpoints.", "Master craftsmanship in every detail.", "complex"),
+    "O2.03": ("Train on presentation standards. Create visual guidelines.", "Beautifully presented, every single time.", "simple"),
+    "O2.04": ("Implement finishing checklists. Add quality inspection step.", "Meticulous attention to every detail.", "simple"),
+    "O2.05": ("Improve packaging. Add delivery condition checks. Train delivery staff.", "Arrives in perfect condition, guaranteed.", "medium"),
+    "O3.01": ("Create comprehensive packing lists. Verify completeness before shipping.", "Everything you need, nothing missing.", "simple"),
+    "O3.02": ("Test all features before release. Maintain feature availability dashboard.", "All features available and working as promised.", "medium"),
+    "O3.03": ("Define clear scope of work. Use completion checklists.", "We deliver the full scope, every time.", "simple"),
+    "O3.04": ("Create comprehensive documentation. Include setup guides and FAQs.", "Clear instructions and helpful guides included.", "simple"),
+    "O4.01": ("Implement order verification. Add confirmation step before fulfillment.", "Exactly what you ordered, guaranteed.", "simple"),
+    "O4.02": ("Build preference tracking system. Remember customer choices.", "We remember your preferences for a personalized experience.", "medium"),
+    "O4.03": ("Train staff on customization options. Empower flexibility.", "Flexible options tailored to your needs.", "simple"),
+    "O4.04": ("Improve needs assessment. Train consultative selling.", "Expert recommendations matched to your specific needs.", "medium"),
+
+    # People (P) - HR/Training solutions
+    "P1.01": ("Train staff on warm greetings. Recognize friendly behavior.", "Friendly faces and warm welcomes await you.", "simple"),
+    "P1.02": ("Implement respect training. Address complaints immediately.", "You'll be treated with dignity and respect.", "simple"),
+    "P1.03": ("Train active listening and empathy. Role-play difficult scenarios.", "Staff who truly understand your situation.", "medium"),
+    "P1.04": ("Reduce time pressure on staff. Train patience techniques.", "Take your time - we're here to help, not rush.", "simple"),
+    "P1.05": ("Hire for passion. Recognize enthusiastic service.", "Passionate people who love helping customers.", "medium"),
+    "P2.01": ("Implement ongoing product training. Create knowledge base.", "Expert knowledge to answer any question.", "medium"),
+    "P2.02": ("Invest in skills training. Certify technical competency.", "Skilled professionals at the top of their craft.", "complex"),
+    "P2.03": ("Empower staff to solve problems. Create escalation paths.", "Creative problem-solvers who find solutions.", "medium"),
+    "P2.04": ("Define professional standards. Provide uniforms/dress code.", "Professional service you can trust.", "simple"),
+    "P2.05": ("Hire experienced staff. Pair juniors with mentors.", "Seasoned experts with years of experience.", "complex"),
+    "P3.01": ("Train proactive checking. Reduce multitasking.", "Attentive service that anticipates your needs.", "simple"),
+    "P3.02": ("Encourage proactive service. Reward initiative.", "Proactive help before you even ask.", "simple"),
+    "P3.03": ("Optimize staffing levels. Reduce wait for assistance.", "Help is always available when you need it.", "medium"),
+    "P3.04": ("Implement task tracking. Create follow-up reminders.", "We do what we say we'll do.", "simple"),
+    "P3.05": ("Train prioritization. Empower urgent action.", "Your needs are treated with appropriate urgency.", "simple"),
+    "P4.01": ("Train jargon-free communication. Use visual aids.", "Clear explanations without confusing jargon.", "simple"),
+    "P4.02": ("Train active listening. Implement feedback loops.", "We truly listen and understand your needs.", "simple"),
+    "P4.03": ("Implement status update systems. Set update expectations.", "Regular updates keep you informed every step.", "simple"),
+    "P4.04": ("Verify information before sharing. Create accuracy checks.", "Accurate information you can rely on.", "simple"),
+    "P4.05": ("Train professional communication. Provide tone guidelines.", "Professional yet personable communication.", "simple"),
+
+    # Journey (J) - Operations/Process solutions
+    "J1.01": ("Display estimated wait times. Implement queue management.", "Minimal wait times with clear expectations.", "medium"),
+    "J1.02": ("Optimize delivery processes. Set realistic timelines.", "Fast, reliable delivery every time.", "medium"),
+    "J1.03": ("Set response time SLAs. Implement ticketing system.", "Quick responses when you reach out.", "medium"),
+    "J1.04": ("Improve scheduling. Buffer time for delays.", "On-time, every time.", "simple"),
+    "J1.05": ("Train on pacing. Allow customer control of tempo.", "At your pace, never rushed.", "simple"),
+    "J2.01": ("Simplify processes. Remove unnecessary steps.", "Simple, straightforward processes.", "medium"),
+    "J2.02": ("Improve signage. Create intuitive layouts.", "Easy to find what you're looking for.", "simple"),
+    "J2.03": ("Digitize forms. Pre-fill known information.", "Minimal paperwork, maximum efficiency.", "medium"),
+    "J2.04": ("Improve handoff protocols. Share context between teams.", "Seamless transitions between team members.", "medium"),
+    "J2.05": ("Build self-service portal. Add online options.", "Self-service options for your convenience.", "complex"),
+    "J3.01": ("Standardize processes. Document procedures.", "Consistent quality every single time.", "medium"),
+    "J3.02": ("Implement order verification. Add accuracy checks.", "Accurate orders, no mistakes.", "simple"),
+    "J3.03": ("Improve system reliability. Add monitoring and alerts.", "Reliable systems that are always available.", "complex"),
+    "J3.04": ("Set clear expectations. Document what to expect.", "No surprises - exactly what you expect.", "simple"),
+    "J3.05": ("Implement quality checks. Track and reduce errors.", "Rare mistakes with quick corrections.", "medium"),
+    "J4.01": ("Train problem acknowledgment. Create issue intake process.", "We acknowledge issues immediately.", "simple"),
+    "J4.02": ("Create clear escalation paths. Empower frontline resolution.", "Efficient resolution process.", "medium"),
+    "J4.03": ("Set resolution time targets. Prioritize open issues.", "Fast resolution when things go wrong.", "medium"),
+    "J4.04": ("Verify fixes before closing. Follow up on resolutions.", "Complete solutions, not band-aids.", "medium"),
+    "J4.05": ("Conduct root cause analysis. Implement systemic fixes.", "We fix problems permanently.", "complex"),
+
+    # Environment (E) - Facilities/IT solutions
+    "E1.01": ("Increase cleaning frequency. Create cleaning checklists.", "Spotlessly clean facilities.", "simple"),
+    "E1.02": ("Implement preventive maintenance. Fix issues promptly.", "Well-maintained, everything works.", "medium"),
+    "E1.03": ("Redesign layout for flow. Add wayfinding.", "Intuitive layout, easy to navigate.", "complex"),
+    "E1.04": ("Upgrade equipment. Implement replacement schedule.", "Modern, state-of-the-art equipment.", "complex"),
+    "E1.05": ("Add clear signage. Use consistent design.", "Clear signs and easy navigation.", "simple"),
+    "E2.01": ("Invest in UX design. Conduct usability testing.", "Beautiful, intuitive digital experience.", "complex"),
+    "E2.02": ("Test all features. Fix bugs promptly.", "Everything works, no broken buttons.", "medium"),
+    "E2.03": ("Optimize page load. Improve server response.", "Lightning-fast digital experience.", "complex"),
+    "E2.04": ("Simplify navigation. Reduce menu depth.", "Find what you need in seconds.", "medium"),
+    "E2.05": ("Optimize for mobile. Test on all devices.", "Works beautifully on any device.", "medium"),
+    "E3.01": ("Design for desired mood. Control sensory elements.", "Perfect atmosphere and ambiance.", "medium"),
+    "E3.02": ("Add sound absorption. Create quiet zones.", "Pleasant sound levels.", "medium"),
+    "E3.03": ("Optimize HVAC. Add zone controls.", "Perfect temperature, always comfortable.", "medium"),
+    "E3.04": ("Manage capacity. Control entry rates.", "Comfortable, never overcrowded.", "medium"),
+    "E3.05": ("Invest in design. Update decor regularly.", "Beautiful, inviting space.", "complex"),
+    "E4.01": ("Conduct safety audits. Address hazards immediately.", "Safety is our top priority.", "medium"),
+    "E4.02": ("Implement hygiene protocols. Train staff on standards.", "Highest hygiene standards.", "medium"),
+    "E4.03": ("Add security measures. Protect customer property.", "Secure environment for you and your belongings.", "medium"),
+    "E4.04": ("Upgrade furniture. Add comfort amenities.", "Comfortable facilities for your visit.", "medium"),
+    "E4.05": ("Conduct emergency drills. Mark exits clearly.", "Prepared for any emergency.", "medium"),
+
+    # Access (A) - Compliance/Design solutions
+    "A1.01": ("Extend operating hours. Consider 24/7 options.", "Open when you need us.", "medium"),
+    "A1.02": ("Add online booking. Increase appointment slots.", "Easy scheduling, plenty of availability.", "medium"),
+    "A1.03": ("Improve inventory management. Add stock alerts.", "Always in stock when you need it.", "medium"),
+    "A1.04": ("Hire additional staff. Optimize scheduling.", "Plenty of staff to help you.", "complex"),
+    "A1.05": ("Expand service area. Add new locations.", "Convenient locations near you.", "complex"),
+    "A2.01": ("Add ramps and elevators. Ensure ADA compliance.", "Fully accessible for all abilities.", "complex"),
+    "A2.02": ("Add alt text. Ensure screen reader compatibility.", "Accessible for visually impaired users.", "medium"),
+    "A2.03": ("Add captions and transcripts. Support hearing devices.", "Accessible for hearing impaired users.", "medium"),
+    "A2.04": ("Use plain language. Simplify instructions.", "Easy to understand for everyone.", "simple"),
+    "A2.05": ("Test with assistive technologies. Follow WCAG guidelines.", "Works with all assistive technologies.", "complex"),
+    "A3.01": ("Hire multilingual staff. Add translation services.", "Service in your language.", "medium"),
+    "A3.02": ("Train cultural competency. Celebrate diversity.", "Welcoming to all backgrounds.", "medium"),
+    "A3.03": ("Offer dietary alternatives. Train allergy awareness.", "Options for all dietary needs.", "medium"),
+    "A3.04": ("Add family amenities. Create kid-friendly options.", "Great for the whole family.", "medium"),
+    "A3.05": ("Train bias awareness. Audit for fair treatment.", "Equal, respectful treatment for all.", "medium"),
+    "A4.01": ("Choose high-traffic location. Improve visibility.", "Convenient, easy-to-find location.", "complex"),
+    "A4.02": ("Add parking spaces. Offer validation.", "Easy, hassle-free parking.", "complex"),
+    "A4.03": ("Locate near transit. Add shuttle service.", "Easy access by public transit.", "complex"),
+    "A4.04": ("Accept all payment types. Add mobile pay.", "Pay however you prefer.", "simple"),
+    "A4.05": ("Add contact channels. Reduce hold times.", "Easy to reach through any channel.", "medium"),
+
+    # Value (V) - Finance/Pricing solutions
+    "V1.01": ("Review pricing strategy. Offer value tiers.", "Competitive, fair pricing.", "complex"),
+    "V1.02": ("Benchmark against expectations. Communicate value.", "Pricing that matches expectations.", "medium"),
+    "V1.03": ("Conduct competitor analysis. Justify premium or match.", "Competitive with the market.", "medium"),
+    "V1.04": ("Display ALL fees upfront. Eliminate surprise charges.", "Complete price transparency - no hidden fees.", "simple"),
+    "V1.05": ("Offer payment plans. Add financing options.", "Flexible payment options available.", "medium"),
+    "V2.01": ("Create clear price lists. Explain pricing structure.", "Clear, easy-to-understand pricing.", "simple"),
+    "V2.02": ("List all fees upfront. Include in quotes.", "Full disclosure of all charges.", "simple"),
+    "V2.03": ("Audit marketing claims. Ensure accuracy.", "Honest, accurate advertising.", "simple"),
+    "V2.04": ("Simplify contracts. Highlight key terms.", "Fair, straightforward terms.", "medium"),
+    "V2.05": ("Verify all claims. Provide evidence.", "Honest representation of our services.", "simple"),
+    "V3.01": ("Streamline processes. Reduce customer time.", "Quick and easy, respecting your time.", "medium"),
+    "V3.02": ("Simplify decisions. Provide guidance.", "Easy decisions, minimal stress.", "medium"),
+    "V3.03": ("Offer delivery/pickup. Reduce physical burden.", "Convenient, minimal effort required.", "medium"),
+    "V3.04": ("Reduce friction points. Improve processes.", "Smooth, hassle-free experience.", "medium"),
+    "V3.05": ("Demonstrate value clearly. Compare alternatives.", "Worth every moment of your time.", "simple"),
+    "V4.01": ("Communicate value proposition. Demonstrate ROI.", "Exceptional value for your investment.", "medium"),
+    "V4.02": ("Ensure quality matches price. Add value-adds.", "Quality that justifies the price.", "medium"),
+    "V4.03": ("Track satisfaction. Follow up post-purchase.", "Complete satisfaction guaranteed.", "medium"),
+    "V4.04": ("Encourage referrals. Make sharing easy.", "So good, you'll tell your friends.", "simple"),
+    "V4.05": ("Build loyalty program. Reward returns.", "Worth coming back for, again and again.", "medium"),
+
+    # Relationship (R) - Leadership/CX solutions
+    "R1.01": ("Train honest communication. Build trust culture.", "Complete honesty and transparency.", "medium"),
+    "R1.02": ("Document commitments. Track promises made.", "We always keep our promises.", "simple"),
+    "R1.03": ("Share policies openly. Communicate changes.", "Open and transparent in everything we do.", "simple"),
+    "R1.04": ("Define ethical standards. Train compliance.", "Ethical business practices you can trust.", "medium"),
+    "R1.05": ("Ensure consistent treatment. Audit fairness.", "Fair dealing with every customer.", "medium"),
+    "R2.01": ("Track customer history. Learn from patterns.", "Proven track record of excellence.", "medium"),
+    "R2.02": ("Standardize experience. Reduce variation.", "Consistent excellence, every visit.", "medium"),
+    "R2.03": ("Communicate changes. Maintain core values.", "Stable and reliable, year after year.", "medium"),
+    "R2.04": ("Build trust incrementally. Honor commitments.", "A business you can trust completely.", "medium"),
+    "R2.05": ("Honor warranties promptly. Exceed guarantees.", "We stand behind our guarantees.", "medium"),
+    "R3.01": ("Train admission of mistakes. Empower acknowledgment.", "We own our mistakes.", "simple"),
+    "R3.02": ("Develop sincere apology training. Show genuine regret.", "Genuine apologies when things go wrong.", "simple"),
+    "R3.03": ("Develop compensation policies. Empower service recovery.", "We make things right with meaningful gestures.", "medium"),
+    "R3.04": ("Conduct post-mortem reviews. Implement learnings.", "We continuously improve from feedback.", "medium"),
+    "R3.05": ("Train ownership mentality. Remove blame culture.", "Full accountability when issues arise.", "medium"),
+    "R4.01": ("Implement CRM. Train staff on customer history.", "We remember you and value your loyalty.", "medium"),
+    "R4.02": ("Create meaningful loyalty program. Offer real value.", "Rewarding loyalty with meaningful perks.", "medium"),
+    "R4.03": ("Train relationship building. Encourage personal connections.", "More than transactions - real relationships.", "medium"),
+    "R4.04": ("Personalize communications. Add value in outreach.", "Helpful updates, not just promotions.", "medium"),
+    "R4.05": ("Build community events. Create belonging.", "Part of our community.", "medium"),
+}
+
+
+def escape_sql(s: str) -> str:
+    """Escape single quotes for SQL."""
+    if s is None:
+        return "NULL"
+    return "'" + s.replace("'", "''") + "'"
+
+
+def generate_sql():
+    """Generate SQL UPDATE statements for all subcodes."""
+
+    print("-- Migration: Populate URT subcodes with solutions")
+    print("-- Version: 011")
+    print("-- Date: 2026-01-25")
+    print("-- Generated from: urt-taxonomy/track-b-engineering/B1-urt-codes.yaml")
+    print()
+    print("BEGIN;")
+    print()
+
+    for code, (solution, marketing_angle, complexity) in SOLUTION_TEMPLATES.items():
+        print(f"""UPDATE pipeline.urt_subcodes
+SET solution = {escape_sql(solution)},
+    marketing_angle = {escape_sql(marketing_angle)},
+    solution_complexity = {escape_sql(complexity)}
+WHERE code = {escape_sql(code)};
+""")
+
+    print("COMMIT;")
+    print()
+    print("-- Verify updates")
+    print("SELECT code, name, solution_complexity, LEFT(solution, 50) as solution_preview")
+    print("FROM pipeline.urt_subcodes")
+    print("WHERE solution IS NOT NULL")
+    print("ORDER BY code")
+    print("LIMIT 10;")
+
+
+if __name__ == "__main__":
+    generate_sql()
--- a/migrations/versions/011_populate_urt_solutions.sql
+++ b/migrations/versions/011_populate_urt_solutions.sql
@@ -0,0 +1,843 @@
+-- Migration: Populate URT subcodes with solutions
+-- Version: 011
+-- Date: 2026-01-25
+-- Generated from: urt-taxonomy/track-b-engineering/B1-urt-codes.yaml
+
+BEGIN;
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement quality testing before delivery. Create incident response process for functionality failures.',
+    marketing_angle = 'Our products work reliably - backed by rigorous quality testing.',
+    solution_complexity = 'medium'
+WHERE code = 'O1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Optimize performance through benchmarking and monitoring. Set performance SLAs.',
+    marketing_angle = 'Experience lightning-fast performance that exceeds expectations.',
+    solution_complexity = 'complex'
+WHERE code = 'O1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Use higher quality materials. Extend warranty coverage. Implement durability testing.',
+    marketing_angle = 'Built to last - quality materials that stand the test of time.',
+    solution_complexity = 'medium'
+WHERE code = 'O1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement regular maintenance schedules. Add redundancy for critical systems.',
+    marketing_angle = 'Dependable reliability you can count on, every time.',
+    solution_complexity = 'medium'
+WHERE code = 'O1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Track outcome metrics. Follow up on customer goals. Provide success coaching.',
+    marketing_angle = 'We measure success by YOUR results, not just our delivery.',
+    solution_complexity = 'medium'
+WHERE code = 'O1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Upgrade to premium materials/ingredients. Source from quality suppliers.',
+    marketing_angle = 'Premium materials and ingredients you can see and feel.',
+    solution_complexity = 'medium'
+WHERE code = 'O2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Invest in craftsman training. Implement quality checkpoints.',
+    marketing_angle = 'Master craftsmanship in every detail.',
+    solution_complexity = 'complex'
+WHERE code = 'O2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train on presentation standards. Create visual guidelines.',
+    marketing_angle = 'Beautifully presented, every single time.',
+    solution_complexity = 'simple'
+WHERE code = 'O2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement finishing checklists. Add quality inspection step.',
+    marketing_angle = 'Meticulous attention to every detail.',
+    solution_complexity = 'simple'
+WHERE code = 'O2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve packaging. Add delivery condition checks. Train delivery staff.',
+    marketing_angle = 'Arrives in perfect condition, guaranteed.',
+    solution_complexity = 'medium'
+WHERE code = 'O2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Create comprehensive packing lists. Verify completeness before shipping.',
+    marketing_angle = 'Everything you need, nothing missing.',
+    solution_complexity = 'simple'
+WHERE code = 'O3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Test all features before release. Maintain feature availability dashboard.',
+    marketing_angle = 'All features available and working as promised.',
+    solution_complexity = 'medium'
+WHERE code = 'O3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Define clear scope of work. Use completion checklists.',
+    marketing_angle = 'We deliver the full scope, every time.',
+    solution_complexity = 'simple'
+WHERE code = 'O3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Create comprehensive documentation. Include setup guides and FAQs.',
+    marketing_angle = 'Clear instructions and helpful guides included.',
+    solution_complexity = 'simple'
+WHERE code = 'O3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement order verification. Add confirmation step before fulfillment.',
+    marketing_angle = 'Exactly what you ordered, guaranteed.',
+    solution_complexity = 'simple'
+WHERE code = 'O4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Build preference tracking system. Remember customer choices.',
+    marketing_angle = 'We remember your preferences for a personalized experience.',
+    solution_complexity = 'medium'
+WHERE code = 'O4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train staff on customization options. Empower flexibility.',
+    marketing_angle = 'Flexible options tailored to your needs.',
+    solution_complexity = 'simple'
+WHERE code = 'O4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve needs assessment. Train consultative selling.',
+    marketing_angle = 'Expert recommendations matched to your specific needs.',
+    solution_complexity = 'medium'
+WHERE code = 'O4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train staff on warm greetings. Recognize friendly behavior.',
+    marketing_angle = 'Friendly faces and warm welcomes await you.',
+    solution_complexity = 'simple'
+WHERE code = 'P1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement respect training. Address complaints immediately.',
+    marketing_angle = 'You''ll be treated with dignity and respect.',
+    solution_complexity = 'simple'
+WHERE code = 'P1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train active listening and empathy. Role-play difficult scenarios.',
+    marketing_angle = 'Staff who truly understand your situation.',
+    solution_complexity = 'medium'
+WHERE code = 'P1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Reduce time pressure on staff. Train patience techniques.',
+    marketing_angle = 'Take your time - we''re here to help, not rush.',
+    solution_complexity = 'simple'
+WHERE code = 'P1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Hire for passion. Recognize enthusiastic service.',
+    marketing_angle = 'Passionate people who love helping customers.',
+    solution_complexity = 'medium'
+WHERE code = 'P1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement ongoing product training. Create knowledge base.',
+    marketing_angle = 'Expert knowledge to answer any question.',
+    solution_complexity = 'medium'
+WHERE code = 'P2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Invest in skills training. Certify technical competency.',
+    marketing_angle = 'Skilled professionals at the top of their craft.',
+    solution_complexity = 'complex'
+WHERE code = 'P2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Empower staff to solve problems. Create escalation paths.',
+    marketing_angle = 'Creative problem-solvers who find solutions.',
+    solution_complexity = 'medium'
+WHERE code = 'P2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Define professional standards. Provide uniforms/dress code.',
+    marketing_angle = 'Professional service you can trust.',
+    solution_complexity = 'simple'
+WHERE code = 'P2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Hire experienced staff. Pair juniors with mentors.',
+    marketing_angle = 'Seasoned experts with years of experience.',
+    solution_complexity = 'complex'
+WHERE code = 'P2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train proactive checking. Reduce multitasking.',
+    marketing_angle = 'Attentive service that anticipates your needs.',
+    solution_complexity = 'simple'
+WHERE code = 'P3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Encourage proactive service. Reward initiative.',
+    marketing_angle = 'Proactive help before you even ask.',
+    solution_complexity = 'simple'
+WHERE code = 'P3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Optimize staffing levels. Reduce wait for assistance.',
+    marketing_angle = 'Help is always available when you need it.',
+    solution_complexity = 'medium'
+WHERE code = 'P3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement task tracking. Create follow-up reminders.',
+    marketing_angle = 'We do what we say we''ll do.',
+    solution_complexity = 'simple'
+WHERE code = 'P3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train prioritization. Empower urgent action.',
+    marketing_angle = 'Your needs are treated with appropriate urgency.',
+    solution_complexity = 'simple'
+WHERE code = 'P3.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train jargon-free communication. Use visual aids.',
+    marketing_angle = 'Clear explanations without confusing jargon.',
+    solution_complexity = 'simple'
+WHERE code = 'P4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train active listening. Implement feedback loops.',
+    marketing_angle = 'We truly listen and understand your needs.',
+    solution_complexity = 'simple'
+WHERE code = 'P4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement status update systems. Set update expectations.',
+    marketing_angle = 'Regular updates keep you informed every step.',
+    solution_complexity = 'simple'
+WHERE code = 'P4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Verify information before sharing. Create accuracy checks.',
+    marketing_angle = 'Accurate information you can rely on.',
+    solution_complexity = 'simple'
+WHERE code = 'P4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train professional communication. Provide tone guidelines.',
+    marketing_angle = 'Professional yet personable communication.',
+    solution_complexity = 'simple'
+WHERE code = 'P4.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Display estimated wait times. Implement queue management.',
+    marketing_angle = 'Minimal wait times with clear expectations.',
+    solution_complexity = 'medium'
+WHERE code = 'J1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Optimize delivery processes. Set realistic timelines.',
+    marketing_angle = 'Fast, reliable delivery every time.',
+    solution_complexity = 'medium'
+WHERE code = 'J1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Set response time SLAs. Implement ticketing system.',
+    marketing_angle = 'Quick responses when you reach out.',
+    solution_complexity = 'medium'
+WHERE code = 'J1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve scheduling. Buffer time for delays.',
+    marketing_angle = 'On-time, every time.',
+    solution_complexity = 'simple'
+WHERE code = 'J1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train on pacing. Allow customer control of tempo.',
+    marketing_angle = 'At your pace, never rushed.',
+    solution_complexity = 'simple'
+WHERE code = 'J1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Simplify processes. Remove unnecessary steps.',
+    marketing_angle = 'Simple, straightforward processes.',
+    solution_complexity = 'medium'
+WHERE code = 'J2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve signage. Create intuitive layouts.',
+    marketing_angle = 'Easy to find what you''re looking for.',
+    solution_complexity = 'simple'
+WHERE code = 'J2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Digitize forms. Pre-fill known information.',
+    marketing_angle = 'Minimal paperwork, maximum efficiency.',
+    solution_complexity = 'medium'
+WHERE code = 'J2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve handoff protocols. Share context between teams.',
+    marketing_angle = 'Seamless transitions between team members.',
+    solution_complexity = 'medium'
+WHERE code = 'J2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Build self-service portal. Add online options.',
+    marketing_angle = 'Self-service options for your convenience.',
+    solution_complexity = 'complex'
+WHERE code = 'J2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Standardize processes. Document procedures.',
+    marketing_angle = 'Consistent quality every single time.',
+    solution_complexity = 'medium'
+WHERE code = 'J3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement order verification. Add accuracy checks.',
+    marketing_angle = 'Accurate orders, no mistakes.',
+    solution_complexity = 'simple'
+WHERE code = 'J3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve system reliability. Add monitoring and alerts.',
+    marketing_angle = 'Reliable systems that are always available.',
+    solution_complexity = 'complex'
+WHERE code = 'J3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Set clear expectations. Document what to expect.',
+    marketing_angle = 'No surprises - exactly what you expect.',
+    solution_complexity = 'simple'
+WHERE code = 'J3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement quality checks. Track and reduce errors.',
+    marketing_angle = 'Rare mistakes with quick corrections.',
+    solution_complexity = 'medium'
+WHERE code = 'J3.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train problem acknowledgment. Create issue intake process.',
+    marketing_angle = 'We acknowledge issues immediately.',
+    solution_complexity = 'simple'
+WHERE code = 'J4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Create clear escalation paths. Empower frontline resolution.',
+    marketing_angle = 'Efficient resolution process.',
+    solution_complexity = 'medium'
+WHERE code = 'J4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Set resolution time targets. Prioritize open issues.',
+    marketing_angle = 'Fast resolution when things go wrong.',
+    solution_complexity = 'medium'
+WHERE code = 'J4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Verify fixes before closing. Follow up on resolutions.',
+    marketing_angle = 'Complete solutions, not band-aids.',
+    solution_complexity = 'medium'
+WHERE code = 'J4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Conduct root cause analysis. Implement systemic fixes.',
+    marketing_angle = 'We fix problems permanently.',
+    solution_complexity = 'complex'
+WHERE code = 'J4.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Increase cleaning frequency. Create cleaning checklists.',
+    marketing_angle = 'Spotlessly clean facilities.',
+    solution_complexity = 'simple'
+WHERE code = 'E1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement preventive maintenance. Fix issues promptly.',
+    marketing_angle = 'Well-maintained, everything works.',
+    solution_complexity = 'medium'
+WHERE code = 'E1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Redesign layout for flow. Add wayfinding.',
+    marketing_angle = 'Intuitive layout, easy to navigate.',
+    solution_complexity = 'complex'
+WHERE code = 'E1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Upgrade equipment. Implement replacement schedule.',
+    marketing_angle = 'Modern, state-of-the-art equipment.',
+    solution_complexity = 'complex'
+WHERE code = 'E1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add clear signage. Use consistent design.',
+    marketing_angle = 'Clear signs and easy navigation.',
+    solution_complexity = 'simple'
+WHERE code = 'E1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Invest in UX design. Conduct usability testing.',
+    marketing_angle = 'Beautiful, intuitive digital experience.',
+    solution_complexity = 'complex'
+WHERE code = 'E2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Test all features. Fix bugs promptly.',
+    marketing_angle = 'Everything works, no broken buttons.',
+    solution_complexity = 'medium'
+WHERE code = 'E2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Optimize page load. Improve server response.',
+    marketing_angle = 'Lightning-fast digital experience.',
+    solution_complexity = 'complex'
+WHERE code = 'E2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Simplify navigation. Reduce menu depth.',
+    marketing_angle = 'Find what you need in seconds.',
+    solution_complexity = 'medium'
+WHERE code = 'E2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Optimize for mobile. Test on all devices.',
+    marketing_angle = 'Works beautifully on any device.',
+    solution_complexity = 'medium'
+WHERE code = 'E2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Design for desired mood. Control sensory elements.',
+    marketing_angle = 'Perfect atmosphere and ambiance.',
+    solution_complexity = 'medium'
+WHERE code = 'E3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add sound absorption. Create quiet zones.',
+    marketing_angle = 'Pleasant sound levels.',
+    solution_complexity = 'medium'
+WHERE code = 'E3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Optimize HVAC. Add zone controls.',
+    marketing_angle = 'Perfect temperature, always comfortable.',
+    solution_complexity = 'medium'
+WHERE code = 'E3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Manage capacity. Control entry rates.',
+    marketing_angle = 'Comfortable, never overcrowded.',
+    solution_complexity = 'medium'
+WHERE code = 'E3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Invest in design. Update decor regularly.',
+    marketing_angle = 'Beautiful, inviting space.',
+    solution_complexity = 'complex'
+WHERE code = 'E3.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Conduct safety audits. Address hazards immediately.',
+    marketing_angle = 'Safety is our top priority.',
+    solution_complexity = 'medium'
+WHERE code = 'E4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement hygiene protocols. Train staff on standards.',
+    marketing_angle = 'Highest hygiene standards.',
+    solution_complexity = 'medium'
+WHERE code = 'E4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add security measures. Protect customer property.',
+    marketing_angle = 'Secure environment for you and your belongings.',
+    solution_complexity = 'medium'
+WHERE code = 'E4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Upgrade furniture. Add comfort amenities.',
+    marketing_angle = 'Comfortable facilities for your visit.',
+    solution_complexity = 'medium'
+WHERE code = 'E4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Conduct emergency drills. Mark exits clearly.',
+    marketing_angle = 'Prepared for any emergency.',
+    solution_complexity = 'medium'
+WHERE code = 'E4.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Extend operating hours. Consider 24/7 options.',
+    marketing_angle = 'Open when you need us.',
+    solution_complexity = 'medium'
+WHERE code = 'A1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add online booking. Increase appointment slots.',
+    marketing_angle = 'Easy scheduling, plenty of availability.',
+    solution_complexity = 'medium'
+WHERE code = 'A1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Improve inventory management. Add stock alerts.',
+    marketing_angle = 'Always in stock when you need it.',
+    solution_complexity = 'medium'
+WHERE code = 'A1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Hire additional staff. Optimize scheduling.',
+    marketing_angle = 'Plenty of staff to help you.',
+    solution_complexity = 'complex'
+WHERE code = 'A1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Expand service area. Add new locations.',
+    marketing_angle = 'Convenient locations near you.',
+    solution_complexity = 'complex'
+WHERE code = 'A1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add ramps and elevators. Ensure ADA compliance.',
+    marketing_angle = 'Fully accessible for all abilities.',
+    solution_complexity = 'complex'
+WHERE code = 'A2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add alt text. Ensure screen reader compatibility.',
+    marketing_angle = 'Accessible for visually impaired users.',
+    solution_complexity = 'medium'
+WHERE code = 'A2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add captions and transcripts. Support hearing devices.',
+    marketing_angle = 'Accessible for hearing impaired users.',
+    solution_complexity = 'medium'
+WHERE code = 'A2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Use plain language. Simplify instructions.',
+    marketing_angle = 'Easy to understand for everyone.',
+    solution_complexity = 'simple'
+WHERE code = 'A2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Test with assistive technologies. Follow WCAG guidelines.',
+    marketing_angle = 'Works with all assistive technologies.',
+    solution_complexity = 'complex'
+WHERE code = 'A2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Hire multilingual staff. Add translation services.',
+    marketing_angle = 'Service in your language.',
+    solution_complexity = 'medium'
+WHERE code = 'A3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train cultural competency. Celebrate diversity.',
+    marketing_angle = 'Welcoming to all backgrounds.',
+    solution_complexity = 'medium'
+WHERE code = 'A3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Offer dietary alternatives. Train allergy awareness.',
+    marketing_angle = 'Options for all dietary needs.',
+    solution_complexity = 'medium'
+WHERE code = 'A3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add family amenities. Create kid-friendly options.',
+    marketing_angle = 'Great for the whole family.',
+    solution_complexity = 'medium'
+WHERE code = 'A3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train bias awareness. Audit for fair treatment.',
+    marketing_angle = 'Equal, respectful treatment for all.',
+    solution_complexity = 'medium'
+WHERE code = 'A3.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Choose high-traffic location. Improve visibility.',
+    marketing_angle = 'Convenient, easy-to-find location.',
+    solution_complexity = 'complex'
+WHERE code = 'A4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add parking spaces. Offer validation.',
+    marketing_angle = 'Easy, hassle-free parking.',
+    solution_complexity = 'complex'
+WHERE code = 'A4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Locate near transit. Add shuttle service.',
+    marketing_angle = 'Easy access by public transit.',
+    solution_complexity = 'complex'
+WHERE code = 'A4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Accept all payment types. Add mobile pay.',
+    marketing_angle = 'Pay however you prefer.',
+    solution_complexity = 'simple'
+WHERE code = 'A4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Add contact channels. Reduce hold times.',
+    marketing_angle = 'Easy to reach through any channel.',
+    solution_complexity = 'medium'
+WHERE code = 'A4.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Review pricing strategy. Offer value tiers.',
+    marketing_angle = 'Competitive, fair pricing.',
+    solution_complexity = 'complex'
+WHERE code = 'V1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Benchmark against expectations. Communicate value.',
+    marketing_angle = 'Pricing that matches expectations.',
+    solution_complexity = 'medium'
+WHERE code = 'V1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Conduct competitor analysis. Justify premium or match.',
+    marketing_angle = 'Competitive with the market.',
+    solution_complexity = 'medium'
+WHERE code = 'V1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Display ALL fees upfront. Eliminate surprise charges.',
+    marketing_angle = 'Complete price transparency - no hidden fees.',
+    solution_complexity = 'simple'
+WHERE code = 'V1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Offer payment plans. Add financing options.',
+    marketing_angle = 'Flexible payment options available.',
+    solution_complexity = 'medium'
+WHERE code = 'V1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Create clear price lists. Explain pricing structure.',
+    marketing_angle = 'Clear, easy-to-understand pricing.',
+    solution_complexity = 'simple'
+WHERE code = 'V2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'List all fees upfront. Include in quotes.',
+    marketing_angle = 'Full disclosure of all charges.',
+    solution_complexity = 'simple'
+WHERE code = 'V2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Audit marketing claims. Ensure accuracy.',
+    marketing_angle = 'Honest, accurate advertising.',
+    solution_complexity = 'simple'
+WHERE code = 'V2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Simplify contracts. Highlight key terms.',
+    marketing_angle = 'Fair, straightforward terms.',
+    solution_complexity = 'medium'
+WHERE code = 'V2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Verify all claims. Provide evidence.',
+    marketing_angle = 'Honest representation of our services.',
+    solution_complexity = 'simple'
+WHERE code = 'V2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Streamline processes. Reduce customer time.',
+    marketing_angle = 'Quick and easy, respecting your time.',
+    solution_complexity = 'medium'
+WHERE code = 'V3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Simplify decisions. Provide guidance.',
+    marketing_angle = 'Easy decisions, minimal stress.',
+    solution_complexity = 'medium'
+WHERE code = 'V3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Offer delivery/pickup. Reduce physical burden.',
+    marketing_angle = 'Convenient, minimal effort required.',
+    solution_complexity = 'medium'
+WHERE code = 'V3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Reduce friction points. Improve processes.',
+    marketing_angle = 'Smooth, hassle-free experience.',
+    solution_complexity = 'medium'
+WHERE code = 'V3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Demonstrate value clearly. Compare alternatives.',
+    marketing_angle = 'Worth every moment of your time.',
+    solution_complexity = 'simple'
+WHERE code = 'V3.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Communicate value proposition. Demonstrate ROI.',
+    marketing_angle = 'Exceptional value for your investment.',
+    solution_complexity = 'medium'
+WHERE code = 'V4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Ensure quality matches price. Add value-adds.',
+    marketing_angle = 'Quality that justifies the price.',
+    solution_complexity = 'medium'
+WHERE code = 'V4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Track satisfaction. Follow up post-purchase.',
+    marketing_angle = 'Complete satisfaction guaranteed.',
+    solution_complexity = 'medium'
+WHERE code = 'V4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Encourage referrals. Make sharing easy.',
+    marketing_angle = 'So good, you''ll tell your friends.',
+    solution_complexity = 'simple'
+WHERE code = 'V4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Build loyalty program. Reward returns.',
+    marketing_angle = 'Worth coming back for, again and again.',
+    solution_complexity = 'medium'
+WHERE code = 'V4.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train honest communication. Build trust culture.',
+    marketing_angle = 'Complete honesty and transparency.',
+    solution_complexity = 'medium'
+WHERE code = 'R1.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Document commitments. Track promises made.',
+    marketing_angle = 'We always keep our promises.',
+    solution_complexity = 'simple'
+WHERE code = 'R1.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Share policies openly. Communicate changes.',
+    marketing_angle = 'Open and transparent in everything we do.',
+    solution_complexity = 'simple'
+WHERE code = 'R1.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Define ethical standards. Train compliance.',
+    marketing_angle = 'Ethical business practices you can trust.',
+    solution_complexity = 'medium'
+WHERE code = 'R1.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Ensure consistent treatment. Audit fairness.',
+    marketing_angle = 'Fair dealing with every customer.',
+    solution_complexity = 'medium'
+WHERE code = 'R1.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Track customer history. Learn from patterns.',
+    marketing_angle = 'Proven track record of excellence.',
+    solution_complexity = 'medium'
+WHERE code = 'R2.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Standardize experience. Reduce variation.',
+    marketing_angle = 'Consistent excellence, every visit.',
+    solution_complexity = 'medium'
+WHERE code = 'R2.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Communicate changes. Maintain core values.',
+    marketing_angle = 'Stable and reliable, year after year.',
+    solution_complexity = 'medium'
+WHERE code = 'R2.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Build trust incrementally. Honor commitments.',
+    marketing_angle = 'A business you can trust completely.',
+    solution_complexity = 'medium'
+WHERE code = 'R2.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Honor warranties promptly. Exceed guarantees.',
+    marketing_angle = 'We stand behind our guarantees.',
+    solution_complexity = 'medium'
+WHERE code = 'R2.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train admission of mistakes. Empower acknowledgment.',
+    marketing_angle = 'We own our mistakes.',
+    solution_complexity = 'simple'
+WHERE code = 'R3.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Develop sincere apology training. Show genuine regret.',
+    marketing_angle = 'Genuine apologies when things go wrong.',
+    solution_complexity = 'simple'
+WHERE code = 'R3.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Develop compensation policies. Empower service recovery.',
+    marketing_angle = 'We make things right with meaningful gestures.',
+    solution_complexity = 'medium'
+WHERE code = 'R3.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Conduct post-mortem reviews. Implement learnings.',
+    marketing_angle = 'We continuously improve from feedback.',
+    solution_complexity = 'medium'
+WHERE code = 'R3.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train ownership mentality. Remove blame culture.',
+    marketing_angle = 'Full accountability when issues arise.',
+    solution_complexity = 'medium'
+WHERE code = 'R3.05';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Implement CRM. Train staff on customer history.',
+    marketing_angle = 'We remember you and value your loyalty.',
+    solution_complexity = 'medium'
+WHERE code = 'R4.01';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Create meaningful loyalty program. Offer real value.',
+    marketing_angle = 'Rewarding loyalty with meaningful perks.',
+    solution_complexity = 'medium'
+WHERE code = 'R4.02';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Train relationship building. Encourage personal connections.',
+    marketing_angle = 'More than transactions - real relationships.',
+    solution_complexity = 'medium'
+WHERE code = 'R4.03';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Personalize communications. Add value in outreach.',
+    marketing_angle = 'Helpful updates, not just promotions.',
+    solution_complexity = 'medium'
+WHERE code = 'R4.04';
+
+UPDATE pipeline.urt_subcodes
+SET solution = 'Build community events. Create belonging.',
+    marketing_angle = 'Part of our community.',
+    solution_complexity = 'medium'
+WHERE code = 'R4.05';
+
+COMMIT;
+
+-- Verify updates
+SELECT code, name, solution_complexity, LEFT(solution, 50) as solution_preview
+FROM pipeline.urt_subcodes
+WHERE solution IS NOT NULL
+ORDER BY code
+LIMIT 10;
--- a/migrations/versions/012_sync_urt_subcodes_from_taxonomy.sql
+++ b/migrations/versions/012_sync_urt_subcodes_from_taxonomy.sql
@@ -0,0 +1,102 @@
+-- =============================================================================
+-- Migration: 012_sync_urt_subcodes_from_taxonomy.sql
+-- Purpose: Sync missing URT subcodes from taxonomy v5.1 to database
+-- =============================================================================
+
+-- Insert missing subcodes (ON CONFLICT DO UPDATE to sync names/definitions)
+INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition, positive_example, negative_example) VALUES
+
+-- J1: Wait Times (missing J1.04, J1.05)
+('J1.04', 'J1', 'J', 'Punctuality', 'Meeting scheduled times', 'Always on time', 'Two hours late'),
+('J1.05', 'J1', 'J', 'Pacing', 'Appropriate speed (not rushed/dragged)', 'Perfect pace throughout', 'Felt rushed through everything'),
+
+-- J2: Booking & Reservations (missing J2.04, J2.05)
+('J2.04', 'J2', 'J', 'Booking Availability', 'Slots/capacity when needed', 'Always available slots', 'Fully booked for weeks'),
+('J2.05', 'J2', 'J', 'Inventory', 'Stock availability', 'Always in stock', 'Out of stock constantly'),
+
+-- J3: System Reliability (missing J3.04, J3.05)
+('J3.04', 'J3', 'J', 'Data Accuracy', 'Correct info in systems', 'All details correct', 'Wrong info in my account'),
+('J3.05', 'J3', 'J', 'Integration', 'Systems work together', 'Seamless between channels', 'Info doesn''t sync'),
+
+-- J4: Problem Resolution (missing J4.04, J4.05)
+('J4.04', 'J4', 'J', 'Escalation', 'Getting to right person', 'Quickly got to manager', 'Endless transfers'),
+('J4.05', 'J4', 'J', 'Closure', 'Issue fully resolved', 'Problem completely solved', 'Issue still not fixed'),
+
+-- A1: Physical Access (missing A1.04, A1.05)
+('A1.04', 'A1', 'A', 'Wayfinding', 'Finding destination', 'Easy to find', 'Got lost trying to find it'),
+('A1.05', 'A1', 'A', 'Physical Accessibility', 'Disability accommodations', 'Wheelchair accessible', 'No ramps or elevators'),
+
+-- A2: Channel Access (missing A2.04, A2.05)
+('A2.04', 'A2', 'A', 'Language Accessibility', 'Multilingual support', 'Available in my language', 'No translation available'),
+('A2.05', 'A2', 'A', 'Hours of Operation', 'Service availability times', 'Open when needed', 'Terrible hours'),
+
+-- A3: Information Access (missing A3.04, A3.05)
+('A3.04', 'A3', 'A', 'Documentation Clarity', 'Clear instructions', 'Easy to understand docs', 'Confusing instructions'),
+('A3.05', 'A3', 'A', 'Support Accessibility', 'Getting help when needed', 'Easy to reach support', 'Impossible to get help'),
+
+-- A4: Financial Access (missing A4.04, A4.05)
+('A4.04', 'A4', 'A', 'Payment Flexibility', 'Multiple payment options', 'Many payment options', 'Only accepts cash'),
+('A4.05', 'A4', 'A', 'Refund Accessibility', 'Getting money back', 'Easy refund process', 'Impossible to get refund'),
+
+-- E1: Physical Environment (missing E1.04, E1.05)
+('E1.04', 'E1', 'E', 'Ambiance', 'Atmosphere and vibe', 'Great atmosphere', 'Depressing environment'),
+('E1.05', 'E1', 'E', 'Comfort', 'Physical comfort', 'Very comfortable', 'Uncomfortable seating'),
+
+-- E2: Digital Environment (missing E2.04, E2.05)
+('E2.04', 'E2', 'E', 'Visual Design', 'Aesthetics of interface', 'Beautiful design', 'Ugly interface'),
+('E2.05', 'E2', 'E', 'Mobile Experience', 'Mobile usability', 'Great mobile app', 'Terrible mobile site'),
+
+-- E3: Safety & Security (missing E3.04, E3.05)
+('E3.04', 'E3', 'E', 'Health Safety', 'Health precautions', 'Very clean and safe', 'Unsanitary conditions'),
+('E3.05', 'E3', 'E', 'Cyber Security', 'Digital security', 'Secure platform', 'Got hacked'),
+
+-- E4: Sustainability (missing E4.04, E4.05)
+('E4.04', 'E4', 'E', 'Social Responsibility', 'Ethical practices', 'Ethical company', 'Exploitative practices'),
+('E4.05', 'E4', 'E', 'Community Impact', 'Local community effect', 'Supports local community', 'Hurts local businesses'),
+
+-- V1: Pricing (missing V1.04, V1.05)
+('V1.04', 'V1', 'V', 'Price Transparency', 'Clear pricing', 'Clear pricing upfront', 'Hidden costs everywhere'),
+('V1.05', 'V1', 'V', 'Price Stability', 'Consistent pricing', 'Same price always', 'Prices keep changing'),
+
+-- V2: Value Perception (missing V2.04, V2.05)
+('V2.04', 'V2', 'V', 'Quality-Price Ratio', 'Worth vs cost', 'Excellent quality for price', 'Overpriced for quality'),
+('V2.05', 'V2', 'V', 'Competitive Value', 'Compared to alternatives', 'Best value around', 'Better deals elsewhere'),
+
+-- V3: Promotions (missing V3.04, V3.05)
+('V3.04', 'V3', 'V', 'Promotion Clarity', 'Clear offer terms', 'Clear promotion rules', 'Misleading promotions'),
+('V3.05', 'V3', 'V', 'Reward Redemption', 'Using points/rewards', 'Easy to redeem rewards', 'Hard to use points'),
+
+-- V4: Billing (missing V4.04, V4.05)
+('V4.04', 'V4', 'V', 'Billing Accuracy', 'Correct charges', 'Always billed correctly', 'Overcharged constantly'),
+('V4.05', 'V4', 'V', 'Billing Resolution', 'Fixing billing issues', 'Quick billing fix', 'Billing disputes ignored'),
+
+-- R1: Trust (missing R1.04, R1.05)
+('R1.04', 'R1', 'R', 'Ethics', 'Ethical behavior', 'Very ethical company', 'Unethical practices'),
+('R1.05', 'R1', 'R', 'Accountability', 'Taking responsibility', 'Owned their mistakes', 'Never takes blame'),
+
+-- R2: Reliability (missing R2.04, R2.05)
+('R2.04', 'R2', 'R', 'Predictability', 'Consistent experience', 'Always know what to expect', 'Every visit is different'),
+('R2.05', 'R2', 'R', 'Standards', 'Meeting quality standards', 'High standards maintained', 'Standards have dropped'),
+
+-- R3: Care (missing R3.04, R3.05)
+('R3.04', 'R3', 'R', 'Personal Connection', 'Human touch', 'Felt like family', 'Treated like a number'),
+('R3.05', 'R3', 'R', 'Going Extra Mile', 'Beyond expectations', 'Went above and beyond', 'Minimum effort only'),
+
+-- R4: Recovery (missing R4.04, R4.05)
+('R4.04', 'R4', 'R', 'Service Recovery', 'Making things right', 'Fixed problem perfectly', 'Made it worse'),
+('R4.05', 'R4', 'R', 'Feedback Response', 'Acting on feedback', 'Implemented my suggestion', 'Feedback ignored')
+
+ON CONFLICT (code) DO UPDATE SET
+    name = EXCLUDED.name,
+    definition = EXCLUDED.definition,
+    positive_example = EXCLUDED.positive_example,
+    negative_example = EXCLUDED.negative_example;
+
+-- Verify count
+DO $$
+DECLARE
+    subcode_count INTEGER;
+BEGIN
+    SELECT COUNT(*) INTO subcode_count FROM pipeline.urt_subcodes;
+    RAISE NOTICE 'Total subcodes after sync: %', subcode_count;
+END $$;
--- a/migrations/versions/013_insert_primitives.sql
+++ b/migrations/versions/013_insert_primitives.sql
@@ -0,0 +1,411 @@
+-- Migration: Insert frozen primitive dictionary (36 primitives)
+-- Description: Populates the primitives table with the complete URT taxonomy
+-- Date: 2025-01-31
+
+-- Quality dimension (8 primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'EFFECTIVENESS',
+    'QUALITY',
+    'Effectiveness',
+    'Did it achieve its intended purpose?',
+    FALSE,
+    '["worked perfectly", "exactly what I needed", "solved my problem"]',
+    '["didn''t work", "useless", "waste of time"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'TASTE',
+    'QUALITY',
+    'Taste',
+    'Sensory quality (flavor, texture, smell)',
+    FALSE,
+    '["delicious", "amazing taste", "flavorful"]',
+    '["bland", "tasteless", "disgusting"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'CRAFT',
+    'QUALITY',
+    'Craft',
+    'Skill of execution, workmanship',
+    FALSE,
+    '["well-made", "professional", "quality work"]',
+    '["sloppy", "poorly made", "amateur"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'ACCURACY',
+    'QUALITY',
+    'Accuracy',
+    'Correct as ordered/specified',
+    FALSE,
+    '["exactly what I ordered", "perfect", "got everything right"]',
+    '["wrong order", "missing items", "not what I asked for"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'FRESHNESS',
+    'QUALITY',
+    'Freshness',
+    'Fresh vs stale/expired',
+    FALSE,
+    '["fresh", "just made", "new"]',
+    '["stale", "old", "expired"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'TEMPERATURE',
+    'QUALITY',
+    'Temperature',
+    'Appropriate temperature for the item',
+    FALSE,
+    '["hot", "perfect temperature", "cold as it should be"]',
+    '["cold", "lukewarm", "too hot", "room temperature"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'CONDITION',
+    'QUALITY',
+    'Condition',
+    'Physical state, damage, defects',
+    FALSE,
+    '["perfect condition", "like new", "undamaged"]',
+    '["damaged", "broken", "defective"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'CONSISTENCY',
+    'QUALITY',
+    'Consistency',
+    'Same quality across visits/units',
+    FALSE,
+    '["always consistent", "reliable quality", "same every time"]',
+    '["inconsistent", "hit or miss", "varies"]'
+);
+
+-- Service dimension (4 primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'MANNER',
+    'SERVICE',
+    'Manner',
+    'Warmth, respect, patience in interactions',
+    FALSE,
+    '["friendly", "nice", "welcoming", "patient"]',
+    '["rude", "dismissive", "impatient", "attitude"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'COMPETENCE',
+    'SERVICE',
+    'Competence',
+    'Knowledge and skill of staff',
+    FALSE,
+    '["knowledgeable", "professional", "knew what they were doing"]',
+    '["clueless", "incompetent", "didn''t know"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'ATTENTIVENESS',
+    'SERVICE',
+    'Attentiveness',
+    'Present, notices needs, proactive',
+    FALSE,
+    '["attentive", "checked on us", "anticipated needs"]',
+    '["ignored", "had to flag down", "neglected"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'COMMUNICATION',
+    'SERVICE',
+    'Communication',
+    'Clear, listens, keeps informed',
+    FALSE,
+    '["clear", "good communication", "kept us updated"]',
+    '["confusing", "didn''t listen", "no updates"]'
+);
+
+-- Process dimension (4 primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'SPEED',
+    'PROCESS',
+    'Speed',
+    'How fast/slow things happen',
+    FALSE,
+    '["fast", "quick", "no wait"]',
+    '["slow", "took forever", "long wait"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'FRICTION',
+    'PROCESS',
+    'Friction',
+    'Ease vs obstacles in the process',
+    FALSE,
+    '["easy", "smooth", "hassle-free"]',
+    '["complicated", "difficult", "hassle"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'RELIABILITY',
+    'PROCESS',
+    'Reliability',
+    'Process works consistently, no errors',
+    FALSE,
+    '["reliable", "dependable", "always works"]',
+    '["unreliable", "errors", "problems"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'AVAILABILITY',
+    'PROCESS',
+    'Availability',
+    'Hours, capacity, stock availability',
+    FALSE,
+    '["always available", "open when needed", "in stock"]',
+    '["closed", "sold out", "no appointments"]'
+);
+
+-- Environment dimension (6 primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'CLEANLINESS',
+    'ENVIRONMENT',
+    'Cleanliness',
+    'Clean, sanitary conditions',
+    FALSE,
+    '["clean", "spotless", "hygienic"]',
+    '["dirty", "filthy", "unsanitary"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'COMFORT',
+    'ENVIRONMENT',
+    'Comfort',
+    'Physical comfort of the space',
+    FALSE,
+    '["comfortable", "cozy", "spacious"]',
+    '["uncomfortable", "cramped", "hard seats"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'AMBIANCE',
+    'ENVIRONMENT',
+    'Ambiance',
+    'Vibe, atmosphere, noise level',
+    FALSE,
+    '["nice atmosphere", "great vibe", "quiet"]',
+    '["loud", "noisy", "bad atmosphere"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'SAFETY',
+    'ENVIRONMENT',
+    'Safety',
+    'Physical and health safety',
+    FALSE,
+    '["safe", "secure", "clean protocols"]',
+    '["unsafe", "dangerous", "health hazard"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'ACCESSIBILITY',
+    'ENVIRONMENT',
+    'Accessibility',
+    'Disability access, location convenience',
+    FALSE,
+    '["accessible", "easy to get to", "good parking"]',
+    '["hard to access", "no parking", "not wheelchair accessible"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'DIGITAL_UX',
+    'ENVIRONMENT',
+    'Digital UX',
+    'App/website usability and performance',
+    FALSE,
+    '["easy to use", "great app", "fast website"]',
+    '["app crashed", "hard to navigate", "slow website"]'
+);
+
+-- Value dimension (4 primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'PRICE_LEVEL',
+    'VALUE',
+    'Price Level',
+    'Absolute cost perception',
+    FALSE,
+    '["affordable", "cheap", "good prices"]',
+    '["expensive", "overpriced", "pricey"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'PRICE_FAIRNESS',
+    'VALUE',
+    'Price Fairness',
+    'Fair value for what was received',
+    FALSE,
+    '["fair price", "worth it", "good value"]',
+    '["rip off", "not worth it", "overcharged"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'PRICE_TRANSPARENCY',
+    'VALUE',
+    'Price Transparency',
+    'Clear pricing, no surprises',
+    FALSE,
+    '["clear pricing", "no hidden fees", "upfront"]',
+    '["hidden fees", "surprise charges", "bait and switch"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'VALUE_FOR_MONEY',
+    'VALUE',
+    'Value for Money',
+    'Overall worth judgment',
+    FALSE,
+    '["great value", "worth every penny", "good deal"]',
+    '["bad value", "waste of money", "not worth it"]'
+);
+
+-- Trust dimension (3 meta primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'HONESTY',
+    'TRUST',
+    'Honesty',
+    'Truthful, no deception',
+    TRUE,
+    '["honest", "transparent", "truthful"]',
+    '["lied", "deceived", "dishonest", "scam"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'ETHICS',
+    'TRUST',
+    'Ethics',
+    'Ethical, fair dealing',
+    TRUE,
+    '["ethical", "fair", "integrity"]',
+    '["unethical", "shady", "crooked"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'PROMISES',
+    'TRUST',
+    'Promises',
+    'Kept or broken commitments',
+    TRUE,
+    '["kept their word", "delivered as promised", "reliable"]',
+    '["broke promise", "didn''t deliver", "false advertising"]'
+);
+
+-- Resolution dimension (3 meta primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'ACKNOWLEDGMENT',
+    'RESOLUTION',
+    'Acknowledgment',
+    'Recognized the problem',
+    TRUE,
+    '["acknowledged", "apologized", "admitted mistake"]',
+    '["denied", "dismissed", "blamed me"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'RESPONSE_QUALITY',
+    'RESOLUTION',
+    'Response Quality',
+    'How well they handled the issue',
+    TRUE,
+    '["handled well", "resolved quickly", "took care of it"]',
+    '["ignored complaint", "unhelpful", "made it worse"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'RECOVERY',
+    'RESOLUTION',
+    'Recovery',
+    'Made it right, compensation',
+    TRUE,
+    '["made it right", "refunded", "compensated"]',
+    '["refused refund", "no compensation", "wouldn''t fix"]'
+);
+
+-- Loyalty dimension (3 meta primitives)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'RETURN_INTENT',
+    'LOYALTY',
+    'Return Intent',
+    'Will/won''t come back',
+    TRUE,
+    '["will be back", "returning customer", "coming again"]',
+    '["never again", "won''t return", "last time"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'RECOMMEND',
+    'LOYALTY',
+    'Recommend',
+    'Would/wouldn''t recommend',
+    TRUE,
+    '["highly recommend", "tell everyone", "must try"]',
+    '["avoid", "don''t go", "stay away", "wouldn''t recommend"]'
+);
+
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'RECOGNITION',
+    'LOYALTY',
+    'Recognition',
+    'Felt valued, remembered',
+    TRUE,
+    '["remembered me", "felt valued", "personal touch"]',
+    '["treated like a number", "didn''t care", "no loyalty"]'
+);
+
+-- Escape dimension (1 meta primitive)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES (
+    'UNMAPPED',
+    'ESCAPE',
+    'Unmapped',
+    'Does not fit taxonomy; preserve evidence',
+    TRUE,
+    '[]',
+    '[]'
+);
+
+-- Verify count
+-- SELECT COUNT(*) FROM pipeline.primitives; -- Should return 36
--- a/migrations/versions/014_primitive_classification_system.sql
+++ b/migrations/versions/014_primitive_classification_system.sql
@@ -0,0 +1,561 @@
+-- =============================================================================
+-- Migration: 014_primitive_classification_system.sql
+-- Purpose: Create primitive classification system for context-aware review analysis
+-- =============================================================================
+--
+-- This migration introduces a "primitive" classification system that allows
+-- industry-specific and category-specific configuration of what aspects to
+-- look for when classifying reviews.
+--
+-- Components:
+--   1. pipeline.primitives - Frozen dictionary of primitives (quality dimensions)
+--   2. ALTER public.gbp_categories - Add primitive_configs and business_context
+--   3. pipeline.jsonb_deep_merge() - Recursive JSONB merge function
+--   4. pipeline.resolve_primitive_config() - Resolve configs through category tree
+--   5. pipeline.get_classification_context() - Get full classification context
+--
+-- Date: 2026-01-31
+-- =============================================================================
+
+
+-- =============================================================================
+-- SECTION 1: PRIMITIVES TABLE (Frozen Dictionary)
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS pipeline.primitives (
+    code VARCHAR(30) PRIMARY KEY,
+    dimension VARCHAR(20) NOT NULL,  -- quality, service, process, environment, value, trust, resolution, loyalty, escape
+    name VARCHAR(100) NOT NULL,
+    definition TEXT NOT NULL,
+    is_meta BOOLEAN DEFAULT FALSE,  -- true for always-active primitives (HONESTY, ETHICS, etc.)
+    base_positive_signals TEXT[],
+    base_negative_signals TEXT[],
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+
+-- Index on dimension for filtering
+CREATE INDEX IF NOT EXISTS idx_primitives_dimension ON pipeline.primitives(dimension);
+
+-- Index on is_meta for quick access to always-active primitives
+CREATE INDEX IF NOT EXISTS idx_primitives_is_meta ON pipeline.primitives(is_meta) WHERE is_meta = TRUE;
+
+COMMENT ON TABLE pipeline.primitives IS 'Frozen dictionary of classification primitives (quality dimensions)';
+COMMENT ON COLUMN pipeline.primitives.code IS 'Unique identifier for the primitive (e.g., FOOD_TASTE, SERVICE_SPEED)';
+COMMENT ON COLUMN pipeline.primitives.dimension IS 'Category of the primitive (quality, service, process, etc.)';
+COMMENT ON COLUMN pipeline.primitives.is_meta IS 'If true, this primitive is always active regardless of category config';
+COMMENT ON COLUMN pipeline.primitives.base_positive_signals IS 'Default positive signal keywords for this primitive';
+COMMENT ON COLUMN pipeline.primitives.base_negative_signals IS 'Default negative signal keywords for this primitive';
+
+
+-- =============================================================================
+-- SECTION 2: ALTER gbp_categories TABLE
+-- =============================================================================
+
+-- Add primitive_configs column (JSONB for flexible config)
+ALTER TABLE public.gbp_categories
+ADD COLUMN IF NOT EXISTS primitive_configs JSONB DEFAULT '{}';
+
+-- Add business_context column (JSONB for industry-specific context)
+ALTER TABLE public.gbp_categories
+ADD COLUMN IF NOT EXISTS business_context JSONB DEFAULT '{}';
+
+-- Add config versioning columns
+ALTER TABLE public.gbp_categories
+ADD COLUMN IF NOT EXISTS config_version VARCHAR(20);
+
+ALTER TABLE public.gbp_categories
+ADD COLUMN IF NOT EXISTS config_generated_by VARCHAR(100);
+
+ALTER TABLE public.gbp_categories
+ADD COLUMN IF NOT EXISTS config_updated_at TIMESTAMP WITH TIME ZONE;
+
+-- GIN indexes for JSONB containment queries
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_primitive_configs
+ON public.gbp_categories USING GIN (primitive_configs);
+
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_business_context
+ON public.gbp_categories USING GIN (business_context);
+
+-- Index for config version lookups
+CREATE INDEX IF NOT EXISTS idx_gbp_categories_config_version
+ON public.gbp_categories(config_version) WHERE config_version IS NOT NULL;
+
+COMMENT ON COLUMN public.gbp_categories.primitive_configs IS 'JSONB config for primitives at this category level (inherits from ancestors)';
+COMMENT ON COLUMN public.gbp_categories.business_context IS 'JSONB business context for this category (industry-specific terminology, etc.)';
+COMMENT ON COLUMN public.gbp_categories.config_version IS 'Version of the primitive config (for cache invalidation)';
+COMMENT ON COLUMN public.gbp_categories.config_generated_by IS 'Tool/model that generated this config';
+COMMENT ON COLUMN public.gbp_categories.config_updated_at IS 'When the config was last updated';
+
+
+-- =============================================================================
+-- SECTION 3: JSONB DEEP MERGE FUNCTION
+-- =============================================================================
+
+-- Recursive JSONB merge function
+-- - Objects: recursively merge (override wins on conflicts)
+-- - Arrays: union with dedup by default
+-- - If object has "__replace__": true, replace entirely instead of merge
+CREATE OR REPLACE FUNCTION pipeline.jsonb_deep_merge(
+    base JSONB,
+    override JSONB
+) RETURNS JSONB AS $$
+DECLARE
+    result JSONB;
+    key TEXT;
+    base_value JSONB;
+    override_value JSONB;
+    merged_array JSONB;
+BEGIN
+    -- Handle NULL cases
+    IF base IS NULL THEN
+        RETURN override;
+    END IF;
+
+    IF override IS NULL THEN
+        RETURN base;
+    END IF;
+
+    -- If override has __replace__ flag, return override without the flag
+    IF jsonb_typeof(override) = 'object' AND override ? '__replace__' AND (override->>'__replace__')::boolean = true THEN
+        RETURN override - '__replace__';
+    END IF;
+
+    -- If both are not objects, override wins
+    IF jsonb_typeof(base) != 'object' OR jsonb_typeof(override) != 'object' THEN
+        RETURN override;
+    END IF;
+
+    -- Both are objects, merge recursively
+    result := base;
+
+    FOR key IN SELECT jsonb_object_keys(override)
+    LOOP
+        override_value := override->key;
+
+        IF NOT (base ? key) THEN
+            -- Key doesn't exist in base, just add it
+            result := result || jsonb_build_object(key, override_value);
+        ELSE
+            base_value := base->key;
+
+            -- Check for __replace__ flag in the override value
+            IF jsonb_typeof(override_value) = 'object'
+               AND override_value ? '__replace__'
+               AND (override_value->>'__replace__')::boolean = true THEN
+                -- Replace entirely (without the __replace__ flag)
+                result := result || jsonb_build_object(key, override_value - '__replace__');
+
+            -- If both are objects, recurse
+            ELSIF jsonb_typeof(base_value) = 'object' AND jsonb_typeof(override_value) = 'object' THEN
+                result := result || jsonb_build_object(
+                    key,
+                    pipeline.jsonb_deep_merge(base_value, override_value)
+                );
+
+            -- If both are arrays, union with dedup
+            ELSIF jsonb_typeof(base_value) = 'array' AND jsonb_typeof(override_value) = 'array' THEN
+                -- Union arrays, remove duplicates
+                -- Using a subquery to deduplicate
+                SELECT jsonb_agg(DISTINCT elem)
+                INTO merged_array
+                FROM (
+                    SELECT jsonb_array_elements(base_value) AS elem
+                    UNION
+                    SELECT jsonb_array_elements(override_value) AS elem
+                ) AS combined;
+
+                result := result || jsonb_build_object(key, COALESCE(merged_array, '[]'::jsonb));
+
+            -- Otherwise, override wins
+            ELSE
+                result := result || jsonb_build_object(key, override_value);
+            END IF;
+        END IF;
+    END LOOP;
+
+    RETURN result;
+END;
+$$ LANGUAGE plpgsql IMMUTABLE;
+
+COMMENT ON FUNCTION pipeline.jsonb_deep_merge(JSONB, JSONB) IS
+'Recursively merges two JSONB objects. Objects are merged recursively (override wins on conflicts).
+Arrays are unioned with dedup. Use {"__replace__": true, ...} to replace instead of merge.';
+
+
+-- =============================================================================
+-- SECTION 4: RESOLVE PRIMITIVE CONFIG FUNCTION
+-- =============================================================================
+
+-- Resolves primitive config by merging ancestor configs (general -> specific)
+CREATE OR REPLACE FUNCTION pipeline.resolve_primitive_config(
+    p_path ltree
+) RETURNS JSONB AS $$
+DECLARE
+    result JSONB := '{}';
+    row_config JSONB;
+BEGIN
+    -- Fetch all ancestor nodes (including self), ordered by level ASC (general -> specific)
+    -- Uses @> operator: p_path is a descendant of (or equal to) the node's path
+    FOR row_config IN
+        SELECT primitive_configs
+        FROM public.gbp_categories
+        WHERE p_path <@ path  -- p_path is descendant of or equal to path
+        ORDER BY level ASC
+    LOOP
+        -- Skip NULL or empty configs
+        IF row_config IS NOT NULL AND row_config != '{}' THEN
+            result := pipeline.jsonb_deep_merge(result, row_config);
+        END IF;
+    END LOOP;
+
+    RETURN result;
+END;
+$$ LANGUAGE plpgsql STABLE;
+
+COMMENT ON FUNCTION pipeline.resolve_primitive_config(ltree) IS
+'Resolves the full primitive config for a category path by merging all ancestor configs from general to specific.';
+
+
+-- =============================================================================
+-- SECTION 5: RESOLVE BUSINESS CONTEXT FUNCTION
+-- =============================================================================
+
+-- Resolves business context by merging ancestor contexts (general -> specific)
+CREATE OR REPLACE FUNCTION pipeline.resolve_business_context(
+    p_path ltree
+) RETURNS JSONB AS $$
+DECLARE
+    result JSONB := '{}';
+    row_context JSONB;
+BEGIN
+    -- Fetch all ancestor nodes (including self), ordered by level ASC (general -> specific)
+    FOR row_context IN
+        SELECT business_context
+        FROM public.gbp_categories
+        WHERE p_path <@ path
+        ORDER BY level ASC
+    LOOP
+        -- Skip NULL or empty contexts
+        IF row_context IS NOT NULL AND row_context != '{}' THEN
+            result := pipeline.jsonb_deep_merge(result, row_context);
+        END IF;
+    END LOOP;
+
+    RETURN result;
+END;
+$$ LANGUAGE plpgsql STABLE;
+
+COMMENT ON FUNCTION pipeline.resolve_business_context(ltree) IS
+'Resolves the full business context for a category path by merging all ancestor contexts from general to specific.';
+
+
+-- =============================================================================
+-- SECTION 6: GET CLASSIFICATION CONTEXT FUNCTION
+-- =============================================================================
+
+-- Returns complete classification context for a category path
+CREATE OR REPLACE FUNCTION pipeline.get_classification_context(
+    p_path ltree
+) RETURNS JSONB AS $$
+DECLARE
+    resolved_primitives JSONB;
+    resolved_context JSONB;
+    primitives_dict JSONB;
+BEGIN
+    -- Resolve the primitive config for this path
+    resolved_primitives := pipeline.resolve_primitive_config(p_path);
+
+    -- Resolve the business context for this path
+    resolved_context := pipeline.resolve_business_context(p_path);
+
+    -- Build the primitives dictionary from the primitives table
+    SELECT jsonb_object_agg(
+        code,
+        jsonb_build_object(
+            'code', code,
+            'dimension', dimension,
+            'name', name,
+            'definition', definition,
+            'is_meta', is_meta,
+            'base_positive_signals', COALESCE(to_jsonb(base_positive_signals), '[]'::jsonb),
+            'base_negative_signals', COALESCE(to_jsonb(base_negative_signals), '[]'::jsonb)
+        )
+    )
+    INTO primitives_dict
+    FROM pipeline.primitives;
+
+    -- Handle case where primitives table is empty
+    IF primitives_dict IS NULL THEN
+        primitives_dict := '{}'::jsonb;
+    END IF;
+
+    -- Return combined context object
+    RETURN jsonb_build_object(
+        'primitive_configs', resolved_primitives,
+        'business_context', resolved_context,
+        'primitives_dictionary', primitives_dict,
+        'category_path', p_path::text,
+        'resolved_at', NOW()
+    );
+END;
+$$ LANGUAGE plpgsql STABLE;
+
+COMMENT ON FUNCTION pipeline.get_classification_context(ltree) IS
+'Returns complete classification context for a category path, including resolved primitive configs,
+business context, and the full primitives dictionary.';
+
+
+-- =============================================================================
+-- SECTION 7: HELPER FUNCTION - GET ACTIVE PRIMITIVES
+-- =============================================================================
+
+-- Returns the list of active primitive codes for a category path
+-- (includes meta primitives + enabled primitives from config)
+CREATE OR REPLACE FUNCTION pipeline.get_active_primitives(
+    p_path ltree
+) RETURNS TEXT[] AS $$
+DECLARE
+    resolved_config JSONB;
+    active_codes TEXT[];
+    meta_codes TEXT[];
+    config_enabled TEXT[];
+    config_disabled TEXT[];
+BEGIN
+    -- Get resolved config
+    resolved_config := pipeline.resolve_primitive_config(p_path);
+
+    -- Get all meta primitives (always active)
+    SELECT array_agg(code)
+    INTO meta_codes
+    FROM pipeline.primitives
+    WHERE is_meta = TRUE;
+
+    -- Get enabled primitives from config
+    IF resolved_config ? 'enabled' THEN
+        SELECT array_agg(elem::text)
+        INTO config_enabled
+        FROM jsonb_array_elements_text(resolved_config->'enabled') AS elem;
+    END IF;
+
+    -- Get disabled primitives from config
+    IF resolved_config ? 'disabled' THEN
+        SELECT array_agg(elem::text)
+        INTO config_disabled
+        FROM jsonb_array_elements_text(resolved_config->'disabled') AS elem;
+    END IF;
+
+    -- Combine: meta + enabled, minus disabled
+    active_codes := COALESCE(meta_codes, ARRAY[]::TEXT[]) || COALESCE(config_enabled, ARRAY[]::TEXT[]);
+
+    -- Remove disabled primitives
+    IF config_disabled IS NOT NULL THEN
+        active_codes := array(
+            SELECT unnest(active_codes)
+            EXCEPT
+            SELECT unnest(config_disabled)
+        );
+    END IF;
+
+    -- Remove duplicates
+    active_codes := array(SELECT DISTINCT unnest(active_codes));
+
+    RETURN active_codes;
+END;
+$$ LANGUAGE plpgsql STABLE;
+
+COMMENT ON FUNCTION pipeline.get_active_primitives(ltree) IS
+'Returns array of active primitive codes for a category path (meta primitives + enabled - disabled).';
+
+
+-- =============================================================================
+-- SECTION 8: SEED INITIAL PRIMITIVES (Examples)
+-- =============================================================================
+
+-- Insert some example primitives (can be extended later)
+INSERT INTO pipeline.primitives (code, dimension, name, definition, is_meta, base_positive_signals, base_negative_signals)
+VALUES
+    -- Meta primitives (always active)
+    ('HONESTY', 'trust', 'Honesty & Truthfulness', 'Whether the business is perceived as honest and truthful in their dealings', TRUE,
+     ARRAY['honest', 'truthful', 'transparent', 'upfront', 'no hidden'],
+     ARRAY['lied', 'dishonest', 'deceptive', 'misleading', 'hidden fees', 'bait and switch']),
+
+    ('ETHICS', 'trust', 'Ethical Behavior', 'Whether the business behaves ethically and with integrity', TRUE,
+     ARRAY['ethical', 'integrity', 'fair', 'principled', 'moral'],
+     ARRAY['unethical', 'scam', 'fraud', 'cheat', 'ripoff', 'shady']),
+
+    ('SAFETY', 'trust', 'Safety & Security', 'Whether customers feel safe and secure', TRUE,
+     ARRAY['safe', 'secure', 'protected', 'trust'],
+     ARRAY['unsafe', 'dangerous', 'security concern', 'risk', 'hazard']),
+
+    -- Quality dimension primitives
+    ('FOOD_TASTE', 'quality', 'Food Taste & Flavor', 'Quality and taste of food items', FALSE,
+     ARRAY['delicious', 'tasty', 'flavorful', 'yummy', 'amazing taste', 'perfectly seasoned'],
+     ARRAY['bland', 'tasteless', 'bad taste', 'over-seasoned', 'under-seasoned', 'disgusting']),
+
+    ('FOOD_FRESHNESS', 'quality', 'Food Freshness', 'Freshness of ingredients and food items', FALSE,
+     ARRAY['fresh', 'crisp', 'just made', 'homemade', 'organic'],
+     ARRAY['stale', 'old', 'not fresh', 'frozen', 'reheated', 'expired']),
+
+    ('FOOD_PORTION', 'quality', 'Portion Size', 'Size and quantity of food servings', FALSE,
+     ARRAY['generous portions', 'large serving', 'filling', 'plenty of food'],
+     ARRAY['small portions', 'tiny', 'not enough', 'skimpy', 'overpriced for size']),
+
+    ('PRODUCT_QUALITY', 'quality', 'Product Quality', 'Overall quality of products', FALSE,
+     ARRAY['high quality', 'well made', 'premium', 'durable', 'excellent quality'],
+     ARRAY['poor quality', 'cheap', 'flimsy', 'broke easily', 'defective']),
+
+    -- Service dimension primitives
+    ('SERVICE_SPEED', 'service', 'Service Speed', 'Speed and timeliness of service', FALSE,
+     ARRAY['fast', 'quick', 'prompt', 'efficient', 'no wait'],
+     ARRAY['slow', 'long wait', 'took forever', 'delayed', 'waited too long']),
+
+    ('SERVICE_FRIENDLINESS', 'service', 'Staff Friendliness', 'Friendliness and warmth of staff', FALSE,
+     ARRAY['friendly', 'welcoming', 'warm', 'nice', 'pleasant', 'smiled'],
+     ARRAY['rude', 'unfriendly', 'cold', 'dismissive', 'attitude', 'ignored']),
+
+    ('SERVICE_KNOWLEDGE', 'service', 'Staff Knowledge', 'Knowledge and expertise of staff', FALSE,
+     ARRAY['knowledgeable', 'expert', 'helpful advice', 'knew their stuff', 'professional'],
+     ARRAY['clueless', 'didnt know', 'unhelpful', 'inexperienced', 'untrained']),
+
+    ('SERVICE_ATTENTIVENESS', 'service', 'Staff Attentiveness', 'How attentive staff are to customer needs', FALSE,
+     ARRAY['attentive', 'checked on us', 'responsive', 'available', 'proactive'],
+     ARRAY['inattentive', 'ignored', 'couldnt find anyone', 'had to flag down', 'neglected']),
+
+    -- Environment dimension primitives
+    ('ENV_CLEANLINESS', 'environment', 'Cleanliness', 'Cleanliness of the establishment', FALSE,
+     ARRAY['clean', 'spotless', 'tidy', 'well-maintained', 'hygienic'],
+     ARRAY['dirty', 'filthy', 'messy', 'gross', 'sticky', 'unhygienic']),
+
+    ('ENV_AMBIANCE', 'environment', 'Ambiance & Atmosphere', 'Overall atmosphere and vibe', FALSE,
+     ARRAY['great atmosphere', 'nice ambiance', 'cozy', 'relaxing', 'beautiful decor'],
+     ARRAY['bad atmosphere', 'uncomfortable', 'loud', 'cramped', 'depressing']),
+
+    ('ENV_PARKING', 'environment', 'Parking Availability', 'Availability and convenience of parking', FALSE,
+     ARRAY['easy parking', 'plenty of parking', 'free parking', 'valet available'],
+     ARRAY['no parking', 'hard to park', 'paid parking', 'had to park far']),
+
+    -- Value dimension primitives
+    ('VALUE_PRICE', 'value', 'Price Level', 'Perception of price levels', FALSE,
+     ARRAY['affordable', 'reasonable prices', 'cheap', 'good deal', 'budget-friendly'],
+     ARRAY['expensive', 'overpriced', 'pricey', 'not worth the price', 'too costly']),
+
+    ('VALUE_WORTH', 'value', 'Value for Money', 'Whether the experience is worth the cost', FALSE,
+     ARRAY['worth it', 'great value', 'bang for buck', 'money well spent'],
+     ARRAY['not worth it', 'waste of money', 'rip off', 'should be cheaper']),
+
+    -- Process dimension primitives
+    ('PROCESS_BOOKING', 'process', 'Booking & Reservations', 'Ease of making reservations or appointments', FALSE,
+     ARRAY['easy to book', 'simple reservation', 'available appointments', 'online booking'],
+     ARRAY['hard to book', 'no availability', 'complicated booking', 'had to call multiple times']),
+
+    ('PROCESS_WAIT', 'process', 'Wait Times', 'Time spent waiting for service', FALSE,
+     ARRAY['no wait', 'seated immediately', 'quick turnaround'],
+     ARRAY['long wait', 'waited forever', 'always busy', 'need to wait in line']),
+
+    -- Resolution dimension primitives
+    ('RESOLUTION_RESPONSE', 'resolution', 'Problem Response', 'How problems and complaints are handled', FALSE,
+     ARRAY['fixed the issue', 'made it right', 'apologized', 'took responsibility'],
+     ARRAY['ignored complaint', 'didnt care', 'blamed me', 'no resolution', 'refused to help']),
+
+    -- Loyalty dimension primitives
+    ('LOYALTY_RETURN', 'loyalty', 'Return Intent', 'Whether customers intend to return', FALSE,
+     ARRAY['will be back', 'coming back', 'regular customer', 'my go-to place'],
+     ARRAY['never again', 'wont return', 'last time', 'not coming back']),
+
+    ('LOYALTY_RECOMMEND', 'loyalty', 'Recommendation Intent', 'Whether customers would recommend to others', FALSE,
+     ARRAY['highly recommend', 'tell everyone', 'bring friends', 'must try'],
+     ARRAY['dont recommend', 'avoid', 'stay away', 'warned friends']),
+
+    -- Escape dimension primitives (when customers leave early or avoid)
+    ('ESCAPE_LEFT', 'escape', 'Early Departure', 'Whether customers left early or walked out', FALSE,
+     ARRAY[]::TEXT[],  -- No positive signals for escape
+     ARRAY['walked out', 'left early', 'didnt finish', 'had to leave', 'couldnt stay'])
+
+ON CONFLICT (code) DO UPDATE SET
+    dimension = EXCLUDED.dimension,
+    name = EXCLUDED.name,
+    definition = EXCLUDED.definition,
+    is_meta = EXCLUDED.is_meta,
+    base_positive_signals = EXCLUDED.base_positive_signals,
+    base_negative_signals = EXCLUDED.base_negative_signals;
+
+
+-- =============================================================================
+-- SECTION 9: EXAMPLE CATEGORY CONFIGS
+-- =============================================================================
+
+-- Example: Set primitive config for Food & Dining sector (level 1)
+-- This would enable food-related primitives for all food businesses
+UPDATE public.gbp_categories
+SET
+    primitive_configs = '{
+        "enabled": ["FOOD_TASTE", "FOOD_FRESHNESS", "FOOD_PORTION", "SERVICE_SPEED", "SERVICE_FRIENDLINESS", "ENV_CLEANLINESS", "ENV_AMBIANCE", "VALUE_PRICE", "VALUE_WORTH", "PROCESS_WAIT"],
+        "weights": {
+            "FOOD_TASTE": 1.5,
+            "FOOD_FRESHNESS": 1.3,
+            "SERVICE_SPEED": 1.2
+        }
+    }'::jsonb,
+    business_context = '{
+        "terminology": {
+            "staff": ["server", "waiter", "waitress", "host", "hostess", "bartender"],
+            "product": ["food", "dish", "meal", "appetizer", "entree", "dessert", "drink"]
+        },
+        "industry": "food_service"
+    }'::jsonb,
+    config_version = 'v1.0.0',
+    config_generated_by = 'migration_014',
+    config_updated_at = NOW()
+WHERE slug = 'food_dining' AND level = 1;
+
+-- Example: Override config for Restaurants (level 2) - adds more specific settings
+UPDATE public.gbp_categories
+SET
+    primitive_configs = '{
+        "enabled": ["PROCESS_BOOKING", "ENV_PARKING"],
+        "weights": {
+            "PROCESS_WAIT": 1.3
+        },
+        "signals": {
+            "FOOD_TASTE": {
+                "positive": ["perfectly cooked", "chef special", "signature dish"],
+                "negative": ["undercooked", "overcooked", "cold food"]
+            }
+        }
+    }'::jsonb,
+    business_context = '{
+        "terminology": {
+            "staff": ["chef", "cook", "sous chef", "kitchen staff"]
+        },
+        "typical_visit_duration": "1-2 hours",
+        "reservation_common": true
+    }'::jsonb,
+    config_version = 'v1.0.0',
+    config_generated_by = 'migration_014',
+    config_updated_at = NOW()
+WHERE slug = 'restaurants' AND level = 2;
+
+
+-- =============================================================================
+-- VERIFICATION QUERIES (can be removed in production)
+-- =============================================================================
+
+-- Verify primitives table
+DO $$
+BEGIN
+    RAISE NOTICE 'Primitives table created with % rows', (SELECT COUNT(*) FROM pipeline.primitives);
+END $$;
+
+-- Verify functions exist
+DO $$
+BEGIN
+    -- Test jsonb_deep_merge
+    ASSERT pipeline.jsonb_deep_merge('{"a": 1}'::jsonb, '{"b": 2}'::jsonb) = '{"a": 1, "b": 2}'::jsonb,
+        'jsonb_deep_merge basic test failed';
+
+    -- Test __replace__ flag
+    ASSERT pipeline.jsonb_deep_merge('{"a": {"x": 1, "y": 2}}'::jsonb, '{"a": {"__replace__": true, "z": 3}}'::jsonb) = '{"a": {"z": 3}}'::jsonb,
+        'jsonb_deep_merge __replace__ test failed';
+
+    RAISE NOTICE 'All function tests passed';
+END $$;
--- a/migrations/versions/015_add_business_info_columns.sql
+++ b/migrations/versions/015_add_business_info_columns.sql
@@ -0,0 +1,29 @@
+-- Migration: Add dedicated columns for business info
+-- Purpose: Move business data from metadata JSONB to queryable/indexable columns
+-- Date: 2026-01-31
+
+-- Add business info columns
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS business_name VARCHAR(500);
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS business_category VARCHAR(255);
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS business_address TEXT;
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS business_rating NUMERIC(3,2);
+
+-- Add indexes for common queries
+CREATE INDEX IF NOT EXISTS idx_jobs_business_name ON jobs(business_name);
+CREATE INDEX IF NOT EXISTS idx_jobs_business_category ON jobs(business_category);
+CREATE INDEX IF NOT EXISTS idx_jobs_business_rating ON jobs(business_rating);
+
+-- Migrate existing data from metadata JSONB to new columns
+UPDATE jobs SET
+    business_name = metadata->>'business_name',
+    business_address = metadata->>'business_address',
+    business_rating = CASE
+        WHEN metadata->>'rating_snapshot' IS NOT NULL
+        THEN (metadata->>'rating_snapshot')::NUMERIC(3,2)
+        ELSE NULL
+    END
+WHERE metadata IS NOT NULL
+  AND (business_name IS NULL OR business_address IS NULL OR business_rating IS NULL);
+
+-- Clean up metadata: remove migrated fields (optional - keeps metadata for performance metrics only)
+-- Note: We keep the data in metadata for backward compatibility, but new code should use columns
--- a/migrations/versions/016_add_gbp_category_resolution.sql
+++ b/migrations/versions/016_add_gbp_category_resolution.sql
@@ -0,0 +1,22 @@
+-- Migration: Add resolved GBP category columns to jobs table
+-- Purpose: Store the matched taxonomy path for classification context
+-- Date: 2026-01-31
+
+-- Add ltree extension if not exists
+CREATE EXTENSION IF NOT EXISTS ltree;
+
+-- Add resolved category columns
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS gbp_category_id INTEGER REFERENCES gbp_categories(id);
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS gbp_category_path ltree;
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS category_resolution_method VARCHAR(20); -- 'exact', 'fuzzy', 'llm', 'hierarchical'
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS business_category_source VARCHAR(20); -- 'google' or 'inferred'
+
+-- Index for fast lookups by category path
+CREATE INDEX IF NOT EXISTS idx_jobs_gbp_category_path ON jobs USING GIST (gbp_category_path);
+CREATE INDEX IF NOT EXISTS idx_jobs_gbp_category_id ON jobs(gbp_category_id);
+
+-- Comment on columns
+COMMENT ON COLUMN jobs.gbp_category_id IS 'FK to gbp_categories - the resolved deepest taxonomy node';
+COMMENT ON COLUMN jobs.gbp_category_path IS 'ltree path for the resolved category (e.g., Retail.Stores.Toy_store)';
+COMMENT ON COLUMN jobs.category_resolution_method IS 'How category was resolved: exact (from Google), fuzzy (trigram match), llm (LLM matched), hierarchical (LLM walked tree)';
+COMMENT ON COLUMN jobs.business_category_source IS 'Where business category originated: google (scraped from Maps) or inferred (LLM inferred from name)';
--- a/packages/pipeline-core/src/pipeline_core/init.py
+++ b/packages/pipeline-core/src/pipeline_core/init.py
@@ -5,7 +5,7 @@ This package provides the base abstractions for building pipelines that can be
 discovered, registered, and rendered with dynamic dashboards.
 """

-from pipeline_core.base import BasePipeline, PipelineMetadata, PipelineResult
+from pipeline_core.base import BasePipeline, PipelineMetadata, PipelineResult, StageResult
 from pipeline_core.contracts import (
    DashboardConfig,
    DashboardSection,
@@ -22,6 +22,7 @@ __all__ = [
    "BasePipeline",
    "PipelineMetadata",
    "PipelineResult",
+    "StageResult",
    # Contracts
    "DashboardConfig",
    "DashboardSection",
--- a/packages/reviewiq-pipeline/IMPROVEMENTS.md
+++ b/packages/reviewiq-pipeline/IMPROVEMENTS.md
@@ -0,0 +1,311 @@
+# ReviewIQ Pipeline Improvement Suggestions
+
+Based on validation testing and analysis of the classification pipeline.
+
+---
+
+## 🔴 High Priority (Quality & Cost Impact)
+
+### 1. Multi-Aspect Detection Gap
+**Problem**: LLM misses secondary codes in multi-aspect reviews.
+- "not too expensive" → V4.01 missed
+- "easy and fast" → J1.01 missed
+
+**Solution**: Update classification prompt to:
+```
+For reviews with multiple distinct topics:
+1. Extract ALL aspects, not just the dominant one
+2. Assign urt_secondary codes for each additional aspect
+3. Flag reviews with 3+ aspects as "complex"
+```
+
+**Impact**: ~15-20% of reviews have multiple aspects being partially captured.
+
+---
+
+### 2. Enable Smart Router (Cost Savings)
+**Problem**: All reviews go through expensive Sonnet model.
+
+**Solution**: Enable the implemented router:
+```python
+Config(
+    router_enabled=True,
+    router_conservative=True,
+    router_cheap_model="claude-3-5-haiku-20241022",
+)
+```
+
+**Impact**:
+- SKIP (1.6%): $0 cost (was ~$0.05)
+- CHEAP (31.4%): ~10x cheaper with Haiku
+- **Estimated 25-30% cost reduction**
+
+---
+
+### 3. JSON Truncation Recovery
+**Problem**: ~33% of batches hit JSON truncation, causing partial failures.
+
+**Current State**: Partial recovery implemented but still loses some reviews.
+
+**Solution**:
+1. Reduce batch size when reviews are long
+2. Add `max_tokens` buffer based on expected output
+3. Implement streaming JSON parser for real-time recovery
+
+```python
+# Dynamic batch sizing based on review length
+if avg_review_length > 200:
+    batch_size = min(batch_size, 15)
+if avg_review_length > 500:
+    batch_size = min(batch_size, 8)
+```
+
+**Impact**: Reduce fallback processing by ~50%, saving time and cost.
+
+---
+
+## 🟡 Medium Priority (Reliability & Accuracy)
+
+### 4. LLM Response Caching
+**Problem**: Retries reprocess already-classified reviews.
+
+**Solution**: Cache successful LLM responses by content hash:
+```python
+class ResponseCache:
+    async def get(self, text_hash: str) -> dict | None:
+        return await redis.get(f"llm:classify:{text_hash}")
+
+    async def set(self, text_hash: str, response: dict, ttl: int = 86400):
+        await redis.setex(f"llm:classify:{text_hash}", ttl, json.dumps(response))
+```
+
+**Impact**:
+- Zero cost for re-runs on same reviews
+- Faster pipeline retries
+- Useful for A/B testing prompts
+
+---
+
+### 5. Confidence-Based Routing
+**Problem**: LLM assigns codes even when uncertain.
+
+**Solution**: Add confidence threshold in prompt:
+```
+If confidence < 70%:
+  - Set confidence: "low"
+  - Use generic code (V4.03) instead of guessing
+  - Flag for human review
+```
+
+**Impact**: Reduces misclassifications, improves data quality.
+
+---
+
+### 6. Post-Classification Validation
+**Problem**: Some classifications don't match review content.
+
+**Solution**: Add rule-based validation layer:
+```python
+def validate_classification(text: str, urt_code: str) -> bool:
+    # Price mentioned but not V4.xx code?
+    if has_price_mention(text) and not urt_code.startswith("V4"):
+        return False, "V4.01"  # Suggest correction
+
+    # Staff mentioned but not P1.xx code?
+    if has_staff_mention(text) and not urt_code.startswith("P1"):
+        return False, "P1.01"
+
+    return True, None
+```
+
+**Impact**: Catch ~5-10% of obvious misclassifications.
+
+---
+
+### 7. Span Coverage Validation
+**Problem**: Some review text not covered by any span.
+
+**Solution**: Track span coverage percentage:
+```python
+def calculate_coverage(text: str, spans: list) -> float:
+    covered_chars = set()
+    for span in spans:
+        covered_chars.update(range(span['start'], span['end']))
+    return len(covered_chars) / len(text)
+
+# Flag if coverage < 60%
+if coverage < 0.6:
+    log.warning(f"Low span coverage: {coverage:.0%}")
+```
+
+**Impact**: Identify reviews where LLM skipped important content.
+
+---
+
+## 🟢 Lower Priority (Optimization & Monitoring)
+
+### 8. Taxonomy Alignment Scoring
+**Problem**: Hard to measure classification quality at scale.
+
+**Solution**: Build automated taxonomy alignment checker:
+```python
+# Check if keywords in text match expected domain
+DOMAIN_KEYWORDS = {
+    "V4": ["price", "money", "worth", "cost", "expensive", "cheap"],
+    "P1": ["staff", "employee", "service", "friendly", "rude"],
+    "J1": ["wait", "fast", "slow", "quick", "time", "minutes"],
+    "E1": ["clean", "dirty", "comfortable", "space", "room"],
+}
+
+def alignment_score(text: str, urt_code: str) -> float:
+    domain = urt_code[0:2]
+    keywords = DOMAIN_KEYWORDS.get(domain, [])
+    matches = sum(1 for kw in keywords if kw in text.lower())
+    return matches / len(keywords) if keywords else 0.5
+```
+
+**Impact**: Quality dashboard, regression detection.
+
+---
+
+### 9. Batch Size Auto-Tuning
+**Problem**: Fixed batch size doesn't adapt to review complexity.
+
+**Solution**: Implement adaptive batch sizing:
+```python
+class AdaptiveBatchSizer:
+    def __init__(self):
+        self.history = []  # (batch_size, success_rate, avg_tokens)
+
+    def recommend_size(self, reviews: list) -> int:
+        avg_length = sum(len(r['text']) for r in reviews) / len(reviews)
+
+        # Learn from history
+        if self.history:
+            # Find optimal size for similar review lengths
+            similar = [h for h in self.history if abs(h['avg_len'] - avg_length) < 50]
+            if similar:
+                return max(h['size'] for h in similar if h['success_rate'] > 0.95)
+
+        # Default heuristics
+        if avg_length > 300:
+            return 10
+        elif avg_length > 150:
+            return 20
+        else:
+            return 30
+```
+
+---
+
+### 10. Cost Tracking Dashboard
+**Problem**: No visibility into per-job, per-stage costs.
+
+**Solution**: Add cost tracking to pipeline output:
+```python
+@dataclass
+class CostBreakdown:
+    stage: str
+    model: str
+    input_tokens: int
+    output_tokens: int
+    cached_tokens: int
+    cost_usd: float
+    reviews_processed: int
+    cost_per_review: float
+
+# Store in database
+CREATE TABLE pipeline.cost_tracking (
+    id SERIAL PRIMARY KEY,
+    execution_id UUID,
+    job_id UUID,
+    stage VARCHAR(50),
+    model VARCHAR(100),
+    input_tokens INT,
+    output_tokens INT,
+    cached_tokens INT,
+    cost_usd DECIMAL(10, 6),
+    reviews_processed INT,
+    created_at TIMESTAMP DEFAULT NOW()
+);
+```
+
+---
+
+### 11. Streaming Classification
+**Problem**: Large batches block until complete.
+
+**Solution**: Implement streaming for real-time progress:
+```python
+async def classify_streaming(reviews: list):
+    async for partial_result in llm_client.stream_batch(reviews):
+        # Yield each review as it completes
+        yield partial_result
+
+        # Persist immediately
+        await persist_classification(partial_result)
+```
+
+**Impact**: Better UX, faster partial results, resilience to failures.
+
+---
+
+### 12. A/B Testing Framework
+**Problem**: Hard to compare prompt/model changes.
+
+**Solution**: Built-in A/B testing:
+```python
+class ABTestConfig:
+    test_name: str
+    variant_a: ClassificationConfig  # Control
+    variant_b: ClassificationConfig  # Treatment
+    split_ratio: float = 0.1  # 10% to treatment
+    metrics: list[str] = ["accuracy", "cost", "latency"]
+
+# Run both variants on same reviews
+results_a = await classify(reviews, config_a)
+results_b = await classify(reviews[:int(len(reviews)*0.1)], config_b)
+
+# Compare metrics
+compare_results(results_a, results_b)
+```
+
+---
+
+## Implementation Priority Matrix
+
+| Improvement | Effort | Impact | Priority |
+|-------------|--------|--------|----------|
+| 1. Multi-Aspect Detection | Medium | High | 🔴 P1 |
+| 2. Enable Smart Router | Low | High | 🔴 P1 |
+| 3. JSON Truncation Fix | Medium | High | 🔴 P1 |
+| 4. Response Caching | Medium | Medium | 🟡 P2 |
+| 5. Confidence Routing | Medium | Medium | 🟡 P2 |
+| 6. Post-Classification Validation | Low | Medium | 🟡 P2 |
+| 7. Span Coverage Validation | Low | Low | 🟢 P3 |
+| 8. Taxonomy Alignment | Medium | Low | 🟢 P3 |
+| 9. Adaptive Batch Sizing | High | Medium | 🟢 P3 |
+| 10. Cost Dashboard | Medium | Low | 🟢 P3 |
+| 11. Streaming Classification | High | Medium | 🟢 P3 |
+| 12. A/B Testing | High | Low | 🟢 P3 |
+
+---
+
+## Quick Wins (Can implement today)
+
+1. **Enable router** - Already implemented, just needs config flag
+2. **Reduce batch size** - Change `classification_batch_size=15` for long reviews
+3. **Add span coverage logging** - Simple metric to track quality
+4. **Post-classification keyword check** - Basic validation rules
+
+---
+
+## Estimated Impact Summary
+
+| Area | Current | After Improvements |
+|------|---------|-------------------|
+| Cost per 1000 reviews | ~$3.40 | ~$2.40 (-30%) |
+| Classification accuracy | ~85% | ~92% |
+| Multi-aspect capture | ~65% | ~90% |
+| Batch failure rate | ~33% | ~10% |
+| Pipeline retry cost | 100% | ~20% (with caching) |
--- a/packages/reviewiq-pipeline/INDUSTRY_TAXONOMY_GAPS.md
+++ b/packages/reviewiq-pipeline/INDUSTRY_TAXONOMY_GAPS.md
@@ -0,0 +1,466 @@
+# Industry-Specific Taxonomy Gap Analysis
+
+## Current URT Coverage
+- **Spec**: 7 domains, 28 categories, 140 subcodes (universal)
+- **Database**: 138 subcodes implemented
+- **Claim**: "Works universally: Any industry, any size, any geography"
+
+---
+
+## Business Sector Analysis
+
+### Tier 1: High-Volume Google Review Industries
+
+These sectors have the most Google reviews and are most likely to be clients.
+
+---
+
+#### 🍽️ 1. RESTAURANTS & FOOD SERVICE
+**Expected Review Volume**: Very High
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Food quality/taste | Very High | ❌ No | **O2.06 Food Quality** |
+| Portion size | High | ❌ No | **O2.09 Portion Size** |
+| Drink quality | High | ❌ No | **O2.07 Drink Quality** |
+| Menu variety | Medium | ❌ No | **O2.08 Menu Variety** |
+| Freshness | High | ⚠️ O2.01 (Materials) | Needs specific code |
+| Chef/Cook skill | Medium | ⚠️ P2.02 (Skill) | Generic |
+| Wait time for food | High | ✅ J1.01 | Covered |
+| Reservation system | Medium | ✅ J2.xx | Covered |
+| Ambiance | High | ✅ E1.04 | Covered |
+| Cleanliness | High | ✅ E1.01 | Covered |
+
+**Missing Codes**:
+```sql
+O2.06 - Food Quality (taste, preparation)
+O2.07 - Drink Quality (beverages, cocktails, coffee)
+O2.08 - Menu Variety (selection, options)
+O2.09 - Portion Size (amount served)
+O2.10 - Freshness (ingredient freshness)
+```
+
+---
+
+#### 🏨 2. HOTELS & ACCOMMODATION
+**Expected Review Volume**: Very High
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Room cleanliness | Very High | ✅ E1.01 | Covered |
+| Bed comfort | High | ⚠️ E1.02 (Comfort) | Needs specific |
+| Bathroom quality | High | ❌ No | **E1.09 Bathroom Quality** |
+| Noise level | High | ❌ No | **E1.10 Noise Level** |
+| WiFi quality | High | ⚠️ E2.xx | Needs specific |
+| Breakfast quality | High | ❌ No | Links to F&B gap |
+| Check-in/out speed | High | ✅ J1.01 | Covered |
+| Pool/Gym facilities | Medium | ❌ No | **E1.11 Amenity Quality** |
+| View | Medium | ❌ No | **E1.12 Room View** |
+| Location | High | ✅ A4.01 | Covered |
+| Value for money | High | ✅ V4.01 | Covered |
+
+**Missing Codes**:
+```sql
+E1.09 - Bathroom Quality (fixtures, water pressure, toiletries)
+E1.10 - Noise Level (soundproofing, street noise, neighbors)
+E1.11 - Amenity Quality (pool, gym, spa facilities)
+E1.12 - Room View (scenery, outlook)
+E2.06 - WiFi/Internet Quality (speed, reliability)
+O2.11 - Bed/Sleep Quality (mattress, pillows, linens)
+```
+
+---
+
+#### 🏥 3. HEALTHCARE (Clinics, Dentists, Doctors)
+**Expected Review Volume**: High
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Treatment effectiveness | Very High | ✅ O1.05 | Covered |
+| Doctor manner | High | ✅ P1.01-05 | Covered |
+| Wait time | Very High | ✅ J1.01-03 | Covered |
+| Pain management | High | ❌ No | **O1.12 Pain/Comfort Management** |
+| Diagnosis accuracy | High | ⚠️ O1.02 | Needs specific |
+| Explanation clarity | High | ❌ No | **P2.06 Medical Communication** |
+| Insurance handling | High | ❌ No | **V3.06 Insurance Processing** |
+| Appointment availability | High | ✅ A1.xx | Covered |
+| Follow-up care | Medium | ❌ No | **R3.06 Follow-up Care** |
+| Hygiene/Sterilization | High | ✅ E3.04 | Covered |
+
+**Missing Codes**:
+```sql
+O1.12 - Pain/Comfort Management (during procedures)
+P2.06 - Medical Communication (explaining diagnosis, treatment)
+V3.06 - Insurance Processing (claims, billing, coverage)
+R3.06 - Follow-up Care (post-treatment support)
+```
+
+---
+
+#### 🚗 4. AUTOMOTIVE (Dealerships, Mechanics, Car Wash)
+**Expected Review Volume**: High
+**Current Coverage**: ✅ Good (based on ClickRent data)
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Vehicle condition | High | ✅ O1.01-02 | Covered |
+| Hidden fees | Very High | ✅ V1.03 | Covered |
+| Staff honesty | High | ✅ R1.01 | Covered |
+| Repair quality | High | ✅ O2.02 | Covered |
+| Price fairness | High | ✅ V1.02 | Covered |
+| Wait time | High | ✅ J1.01 | Covered |
+| Warranty honoring | Medium | ⚠️ V2.04 | Covered |
+| Test drive experience | Medium | ❌ No | **O1.13 Demo/Trial Experience** |
+| Trade-in fairness | Medium | ❌ No | **V1.06 Trade-in Value** |
+
+**Missing Codes**:
+```sql
+O1.13 - Demo/Trial Experience (test drives, product demos)
+V1.06 - Trade-in Value (exchange/trade fairness)
+```
+
+---
+
+#### 💇 5. BEAUTY & WELLNESS (Salons, Spas, Gyms)
+**Expected Review Volume**: High
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Service result | Very High | ✅ O1.05 | Covered |
+| Stylist skill | High | ✅ P2.02 | Covered |
+| Hygiene | High | ✅ E3.04 | Covered |
+| Relaxation | High | ❌ No | **O1.14 Relaxation/Wellness Outcome** |
+| Equipment quality | Medium | ⚠️ E1.xx | Generic |
+| Class quality (gym) | Medium | ❌ No | **O1.15 Class/Instruction Quality** |
+| Membership value | Medium | ✅ V4.01 | Covered |
+| Trainer expertise | Medium | ✅ P2.01 | Covered |
+| Appointment booking | High | ✅ J2.xx | Covered |
+| Atmosphere | High | ✅ E1.04 | Covered |
+
+**Missing Codes**:
+```sql
+O1.14 - Relaxation/Wellness Outcome (stress relief, rejuvenation)
+O1.15 - Class/Instruction Quality (fitness classes, workshops)
+E1.13 - Equipment Quality (gym machines, salon tools)
+```
+
+---
+
+#### 🎢 6. ENTERTAINMENT & RECREATION
+**Expected Review Volume**: High
+**Current Coverage**: ❌ Poor (confirmed by Go Karts data)
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Fun factor | Very High | ❌ No | **O1.08 Entertainment Value** |
+| Excitement/Thrill | High | ❌ No | **O1.09 Excitement Level** |
+| Family suitability | High | ❌ No | **O1.06 Family Suitability** |
+| Group experience | High | ❌ No | **O1.11 Group Suitability** |
+| Safety (rides) | High | ✅ E4.01 | Covered |
+| Queue/Wait | High | ✅ J1.03 | Covered |
+| Value for money | High | ✅ V4.01 | Covered |
+| Staff friendliness | High | ✅ P1.01 | Covered |
+| Would recommend | High | ❌ No | **R1.06 Would Recommend** |
+| Would return | High | ❌ No | **R1.08 Will Return** |
+
+**Missing Codes**: (Already documented)
+```sql
+O1.06 - Family Suitability
+O1.08 - Entertainment Value
+O1.09 - Excitement Level
+O1.11 - Group Suitability
+R1.06 - Would Recommend
+R1.08 - Will Return
+```
+
+---
+
+#### 🏬 7. RETAIL (Shops, Stores, E-commerce)
+**Expected Review Volume**: Very High
+**Current Coverage**: ✅ Good
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Product quality | High | ✅ O2.01 | Covered |
+| Stock availability | High | ✅ A1.03 | Covered |
+| Price competitiveness | High | ✅ V2.05 | Covered |
+| Return policy | High | ✅ V2.04 | Covered |
+| Staff helpfulness | High | ✅ P2.xx | Covered |
+| Store organization | High | ✅ E1.03 | Covered |
+| Checkout speed | High | ✅ J1.01 | Covered |
+| Delivery (e-comm) | High | ✅ J1.02 | Covered |
+| Packaging | Medium | ⚠️ O2.05 | Partial |
+
+**Minor Gaps**:
+```sql
+O2.12 - Packaging Quality (e-commerce specific)
+```
+
+---
+
+#### 🎓 8. EDUCATION & TRAINING
+**Expected Review Volume**: Medium
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Learning outcome | Very High | ✅ O1.05 | Covered |
+| Teacher quality | High | ✅ P2.xx | Covered |
+| Course content | High | ❌ No | **O2.13 Course/Curriculum Quality** |
+| Materials quality | Medium | ✅ O2.01 | Covered |
+| Value for tuition | High | ✅ V4.01 | Covered |
+| Schedule flexibility | Medium | ⚠️ O4.03 | Generic |
+| Progress tracking | Medium | ❌ No | **J4.06 Progress Communication** |
+| Certification value | Medium | ❌ No | **O1.16 Credential Value** |
+
+**Missing Codes**:
+```sql
+O2.13 - Course/Curriculum Quality (content, structure, relevance)
+O1.16 - Credential/Certification Value
+J4.06 - Progress Communication (tracking, feedback)
+```
+
+---
+
+#### 🏠 9. HOME SERVICES (Plumbers, Electricians, Cleaners)
+**Expected Review Volume**: High
+**Current Coverage**: ✅ Good
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Work quality | Very High | ✅ O2.02 | Covered |
+| Punctuality | Very High | ✅ J1.02 | Covered |
+| Price transparency | High | ✅ V1.03 | Covered |
+| Cleanliness after work | High | ✅ E1.01 | Covered |
+| Professionalism | High | ✅ P1.xx | Covered |
+| Problem solved | High | ✅ O1.05 | Covered |
+| Quote accuracy | High | ⚠️ V1.02 | Covered |
+| Warranty/Guarantee | Medium | ⚠️ V2.04 | Covered |
+
+**No major gaps** - well covered by existing codes.
+
+---
+
+#### 🌍 10. TRAVEL & TOURISM (Tours, Attractions, Museums)
+**Expected Review Volume**: High
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Experience quality | High | ⚠️ V4.03 | Too generic |
+| Guide quality | High | ✅ P2.xx | Covered |
+| Value for money | High | ✅ V4.01 | Covered |
+| Educational value | Medium | ❌ No | **O1.17 Educational/Informative Value** |
+| Crowd management | Medium | ✅ J1.03 | Covered |
+| Photo opportunities | Medium | ❌ No | **E1.14 Photo/Visual Appeal** |
+| Accessibility | Medium | ✅ A3.xx | Covered |
+| Authenticity | Medium | ❌ No | **O2.14 Authenticity/Genuineness** |
+| Memorable experience | High | ❌ No | Links to Entertainment gap |
+
+**Missing Codes**:
+```sql
+O1.17 - Educational/Informative Value (learning experience)
+O2.14 - Authenticity/Genuineness (cultural accuracy, real experience)
+E1.14 - Photo/Visual Appeal (Instagram-worthy, scenic)
+```
+
+---
+
+#### 🐾 11. PET SERVICES (Vets, Groomers, Pet Stores)
+**Expected Review Volume**: Medium
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Animal care quality | Very High | ⚠️ O1.05 | Needs specific |
+| Handling gentleness | High | ❌ No | **P1.06 Animal Handling** |
+| Treatment outcome | High | ✅ O1.05 | Covered |
+| Pet comfort/stress | High | ❌ No | **O1.18 Pet Comfort/Stress** |
+| Staff knowledge | High | ✅ P2.01 | Covered |
+| Emergency availability | Medium | ✅ A1.01 | Covered |
+| Price transparency | High | ✅ V1.xx | Covered |
+| Facility cleanliness | High | ✅ E1.01 | Covered |
+
+**Missing Codes**:
+```sql
+P1.06 - Animal Handling (gentleness, care with pets)
+O1.18 - Pet Comfort/Stress (during service)
+```
+
+---
+
+#### 🎵 12. NIGHTLIFE (Bars, Clubs, Live Music)
+**Expected Review Volume**: Medium
+**Current Coverage**: ⚠️ Partial
+
+| Topic | Frequency | Has Code? | Gap |
+|-------|-----------|-----------|-----|
+| Music/DJ quality | Very High | ❌ No | **E2.07 Music/Sound Quality** |
+| Drink quality/variety | High | ❌ No | Links to F&B gap |
+| Atmosphere/Vibe | High | ✅ E1.04 | Covered |
+| Crowd quality | Medium | ❌ No | **E2.08 Crowd/Clientele Quality** |
+| Door policy | Medium | ❌ No | **A1.06 Entry/Door Policy** |
+| Dance floor | Medium | ❌ No | **E1.15 Dance Floor Quality** |
+| Security/Safety | High | ✅ E4.01-02 | Covered |
+| Drink prices | High | ✅ V1.01 | Covered |
+| Staff attitude | High | ✅ P1.xx | Covered |
+
+**Missing Codes**:
+```sql
+E2.07 - Music/Sound Quality (DJ, live music, sound system)
+E2.08 - Crowd/Clientele Quality (type of people, vibe)
+A1.06 - Entry/Door Policy (fairness, accessibility)
+E1.15 - Dance Floor Quality (space, surface, lighting)
+```
+
+---
+
+## Priority Summary: Missing Codes by Urgency
+
+### 🔴 Critical (Universal - All Industries)
+```sql
+R1.06 - Would Recommend (recommendation intent)
+R1.07 - Would Not Recommend (anti-recommendation)
+R1.08 - Will Return (return intent positive)
+R1.09 - Won't Return (return intent negative)
+```
+
+### 🟠 High (Multiple Industries)
+```sql
+-- Food & Beverage (Restaurants, Hotels, Nightlife, Cafes)
+O2.06 - Food Quality
+O2.07 - Drink Quality
+O2.08 - Menu Variety
+O2.09 - Portion Size
+
+-- Entertainment & Tourism
+O1.06 - Family Suitability
+O1.08 - Entertainment Value
+O1.09 - Excitement Level
+O1.11 - Group Suitability
+
+-- Hospitality
+E1.09 - Bathroom Quality
+E1.10 - Noise Level
+E1.11 - Amenity Quality
+E2.06 - WiFi Quality
+```
+
+### 🟡 Medium (Industry-Specific)
+```sql
+-- Healthcare
+O1.12 - Pain/Comfort Management
+P2.06 - Medical Communication
+V3.06 - Insurance Processing
+
+-- Nightlife
+E2.07 - Music/Sound Quality
+E2.08 - Crowd/Clientele Quality
+
+-- Education
+O2.13 - Course/Curriculum Quality
+O1.16 - Credential Value
+
+-- Hotels
+O2.11 - Bed/Sleep Quality
+E1.12 - Room View
+```
+
+### 🟢 Lower (Niche)
+```sql
+-- Automotive
+O1.13 - Demo/Trial Experience
+V1.06 - Trade-in Value
+
+-- Pet Services
+P1.06 - Animal Handling
+O1.18 - Pet Comfort
+
+-- Tourism
+O1.17 - Educational Value
+O2.14 - Authenticity
+E1.14 - Photo Appeal
+```
+
+---
+
+## Recommended Implementation Phases
+
+### Phase 1: Universal Codes (Add Immediately)
+4 codes - Covers ALL industries
+```sql
+INSERT INTO pipeline.urt_subcodes VALUES
+('R1.06', 'R1', 'R', 'Would Recommend', 'Intent to recommend to others'),
+('R1.07', 'R1', 'R', 'Would Not Recommend', 'Explicit anti-recommendation'),
+('R1.08', 'R1', 'R', 'Will Return', 'Intent to visit again'),
+('R1.09', 'R1', 'R', 'Won''t Return', 'Explicit no-return statement');
+```
+
+### Phase 2: High-Frequency Gaps (Next Sprint)
+12 codes - Covers Hospitality, F&B, Entertainment
+```sql
+-- Food & Beverage
+('O2.06', 'O2', 'O', 'Food Quality', 'Taste, preparation, cooking quality'),
+('O2.07', 'O2', 'O', 'Drink Quality', 'Beverage quality and preparation'),
+('O2.08', 'O2', 'O', 'Menu Variety', 'Range of food/drink options'),
+('O2.09', 'O2', 'O', 'Portion Size', 'Amount of food served'),
+
+-- Entertainment
+('O1.06', 'O1', 'O', 'Family Suitability', 'Appropriate for children and families'),
+('O1.08', 'O1', 'O', 'Entertainment Value', 'How enjoyable/fun the experience was'),
+('O1.09', 'O1', 'O', 'Excitement Level', 'Thrill and adrenaline factor'),
+('O1.11', 'O1', 'O', 'Group Suitability', 'Good for groups/parties'),
+
+-- Hospitality
+('E1.09', 'E1', 'E', 'Bathroom Quality', 'Fixtures, water pressure, toiletries'),
+('E1.10', 'E1', 'E', 'Noise Level', 'Soundproofing, ambient noise'),
+('E1.11', 'E1', 'E', 'Amenity Quality', 'Pool, gym, spa facilities'),
+('E2.06', 'E2', 'E', 'WiFi Quality', 'Internet speed and reliability');
+```
+
+### Phase 3: Industry-Specific (As Clients Onboard)
+Add codes when specific industries become clients.
+
+---
+
+## Coverage Score by Industry
+
+| Industry | Current Coverage | After Phase 1 | After Phase 2 |
+|----------|-----------------|---------------|---------------|
+| Restaurants | 60% | 65% | **90%** |
+| Hotels | 65% | 70% | **90%** |
+| Healthcare | 70% | 75% | 80% |
+| Automotive | 85% | 90% | 90% |
+| Beauty/Wellness | 75% | 80% | 85% |
+| Entertainment | 50% | 60% | **90%** |
+| Retail | 90% | 95% | 95% |
+| Education | 70% | 75% | 80% |
+| Home Services | 95% | 95% | 95% |
+| Travel/Tourism | 60% | 70% | **85%** |
+| Pet Services | 75% | 80% | 85% |
+| Nightlife | 55% | 60% | **85%** |
+
+---
+
+## Conclusion
+
+**Is the taxonomy ready for production?**
+
+| Scenario | Ready? |
+|----------|--------|
+| Service businesses (auto, home services) | ✅ Yes |
+| Retail | ✅ Yes |
+| Healthcare | ⚠️ Mostly (add Phase 1) |
+| Restaurants/F&B | ❌ No (need Phase 1+2) |
+| Hotels | ❌ No (need Phase 1+2) |
+| Entertainment | ❌ No (need Phase 1+2) |
+| Nightlife | ❌ No (need Phase 1+2) |
+
+**Recommended Action**:
+1. Add Phase 1 codes immediately (4 universal codes)
+2. Add Phase 2 codes before onboarding hospitality/entertainment clients
+3. Add Phase 3 codes as specific industries come online
--- a/packages/reviewiq-pipeline/TAXONOMY_GAPS.md
+++ b/packages/reviewiq-pipeline/TAXONOMY_GAPS.md
@@ -0,0 +1,238 @@
+# URT Taxonomy Gap Analysis
+
+## Executive Summary
+
+The current taxonomy has **significant gaps** that cause ~30-40% of review content to be classified as generic codes (V4.03, O1.05) when more specific codes would be appropriate.
+
+**Current State**: 7 domains, 28 categories, 552 subcodes
+**Gap Impact**: ~653 reviews (58% of dataset) mention topics without specific codes
+
+---
+
+## Critical Gaps (High Frequency, No Coverage)
+
+### 🔴 Gap 1: Family/Kids Experience
+**Mentions**: 205 reviews (18% of dataset)
+**Current Mapping**: → V4.03 (Generic) or O1.05 (Outcome)
+
+**Missing Codes**:
+| Proposed Code | Name | Definition |
+|---------------|------|------------|
+| O1.06 | Family Suitability | Appropriate for children and families |
+| O1.07 | Age Appropriateness | Suitable for specific age groups |
+| E1.06 | Child-Friendly Facilities | Amenities for children |
+
+**Example Reviews Being Misclassified**:
+- "Brilliant day for adults and kids" → V4.03 (should be O1.06)
+- "Great family fun" → O1.05 (should be O1.06)
+- "Los niños disfrutaron mucho" → V4.03 (should be O1.06)
+
+---
+
+### 🔴 Gap 2: Fun/Entertainment Value
+**Mentions**: 198 reviews (18% of dataset)
+**Current Mapping**: → V4.03 (Generic) or O1.05 (Outcome)
+
+**Missing Codes**:
+| Proposed Code | Name | Definition |
+|---------------|------|------------|
+| O1.08 | Entertainment Value | How enjoyable/fun the experience was |
+| O1.09 | Excitement Level | Thrill and adrenaline factor |
+| O1.10 | Engagement | How captivating the experience was |
+
+**Example Reviews Being Misclassified**:
+- "Everyone had a blast" → V4.03 (should be O1.08)
+- "Muy divertido" → V4.03 (should be O1.08)
+- "Fantastische kartbaan" → V4.03 (should be O1.08)
+
+---
+
+### 🔴 Gap 3: Recommendation Intent
+**Mentions**: 103 reviews (9% of dataset)
+**Current Mapping**: → V4.03 (Generic)
+
+**Missing Codes**:
+| Proposed Code | Name | Definition |
+|---------------|------|------------|
+| R1.06 | Would Recommend | Intent to recommend to others |
+| R1.07 | Would Not Recommend | Explicit anti-recommendation |
+| V4.06 | Net Promoter Signal | Explicit NPS-style sentiment |
+
+**Example Reviews Being Misclassified**:
+- "100% recomendable" → V4.03 (should be R1.06)
+- "Highly recommend" → V4.03 (should be R1.06)
+- "Don't come here" → V4.03 V- (should be R1.07)
+
+---
+
+### 🟡 Gap 4: Return Intent
+**Mentions**: 65 reviews (6% of dataset)
+**Current Mapping**: → V4.03 or R4.03
+
+**Missing Codes**:
+| Proposed Code | Name | Definition |
+|---------------|------|------------|
+| R1.08 | Will Return | Intent to visit again |
+| R1.09 | Won't Return | Explicit no-return statement |
+
+**Example Reviews**:
+- "We'll definitely be back" → R4.03 (should be R1.08)
+- "No volveré" → V4.03 (should be R1.09)
+
+---
+
+### 🟡 Gap 5: Food & Beverage
+**Mentions**: 59 reviews (5% of dataset)
+**Current Mapping**: → O1.01 or V4.03
+
+**Missing Codes**:
+| Proposed Code | Name | Definition |
+|---------------|------|------------|
+| O2.06 | Food Quality | Taste, freshness, presentation |
+| O2.07 | Drink Quality | Beverage quality |
+| O2.08 | Menu Variety | Range of food/drink options |
+| O2.09 | Portion Size | Amount of food served |
+
+**Example Reviews**:
+- "Great food at the cafe" → O1.01 (should be O2.06)
+- "Drinks were overpriced" → V1.01 (should be O2.07 + V1.01)
+
+---
+
+### 🟡 Gap 6: Excitement/Thrill
+**Mentions**: 23 reviews (2% of dataset)
+**Current Mapping**: → V4.03 or O1.05
+
+**Missing Code**:
+| Proposed Code | Name | Definition |
+|---------------|------|------------|
+| O1.09 | Excitement Level | Thrill and adrenaline factor |
+
+---
+
+## Medium Gaps (Moderate Frequency)
+
+### Gap 7: Booking/Reservation Process
+**Current**: J2.xx exists but limited
+
+**Missing**:
+| Code | Name | Definition |
+|------|------|------------|
+| J2.06 | Online Booking | Digital reservation experience |
+| J2.07 | Booking Confirmation | Clear confirmation process |
+
+---
+
+### Gap 8: Group Experience
+**Missing**:
+| Code | Name | Definition |
+|------|------|------------|
+| O1.11 | Group Suitability | Good for groups/parties |
+| O1.12 | Team Building | Corporate/team activities |
+
+---
+
+### Gap 9: Seasonal/Weather Factors
+**Missing**:
+| Code | Name | Definition |
+|------|------|------------|
+| E1.07 | Weather Protection | Shelter from elements |
+| E1.08 | Seasonal Suitability | Appropriate for season |
+
+---
+
+## Impact Analysis
+
+### Current Classification Distribution (V4.03 Overuse)
+
+```
+Code    | Count | %    | Issue
+--------|-------|------|-------
+P1.01   |   477 | 14%  | ✅ Correct usage
+V4.03   |   319 | 10%  | ⚠️ Likely 50%+ misclassified
+O1.02   |   270 |  8%  | ✅ Correct usage
+V1.01   |   211 |  6%  | ✅ Correct usage
+O1.01   |   174 |  5%  | ✅ Correct usage
+```
+
+### Estimated Misclassification Rate
+
+| Gap Topic | Reviews | Est. Misclassified | % of Total |
+|-----------|---------|-------------------|------------|
+| Family/Kids | 205 | ~180 | 16% |
+| Fun/Entertainment | 198 | ~170 | 15% |
+| Recommendation | 103 | ~95 | 8% |
+| Return Intent | 65 | ~50 | 4% |
+| Food/Drinks | 59 | ~40 | 4% |
+| Excitement | 23 | ~20 | 2% |
+| **TOTAL** | **653** | **~555** | **~49%** |
+
+---
+
+## Recommended Taxonomy Additions
+
+### Priority 1: Add to O1 (Core Product/Service)
+```sql
+INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition) VALUES
+('O1.06', 'O1', 'O', 'Family Suitability', 'Appropriate for children and families'),
+('O1.07', 'O1', 'O', 'Age Appropriateness', 'Suitable for specific age groups'),
+('O1.08', 'O1', 'O', 'Entertainment Value', 'How enjoyable/fun the experience was'),
+('O1.09', 'O1', 'O', 'Excitement Level', 'Thrill and adrenaline factor'),
+('O1.10', 'O1', 'O', 'Engagement', 'How captivating the experience was'),
+('O1.11', 'O1', 'O', 'Group Suitability', 'Good for groups/parties');
+```
+
+### Priority 2: Add to R1 (Relationship/Loyalty)
+```sql
+INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition) VALUES
+('R1.06', 'R1', 'R', 'Would Recommend', 'Intent to recommend to others'),
+('R1.07', 'R1', 'R', 'Would Not Recommend', 'Explicit anti-recommendation'),
+('R1.08', 'R1', 'R', 'Will Return', 'Intent to visit again'),
+('R1.09', 'R1', 'R', 'Won''t Return', 'Explicit no-return statement');
+```
+
+### Priority 3: Add to O2 (Product Features)
+```sql
+INSERT INTO pipeline.urt_subcodes (code, category_code, domain_code, name, definition) VALUES
+('O2.06', 'O2', 'O', 'Food Quality', 'Taste, freshness, presentation of food'),
+('O2.07', 'O2', 'O', 'Drink Quality', 'Quality of beverages'),
+('O2.08', 'O2', 'O', 'Menu Variety', 'Range of food/drink options'),
+('O2.09', 'O2', 'O', 'Portion Size', 'Amount of food served');
+```
+
+---
+
+## Validation Query
+
+After adding codes, verify reduction in V4.03 usage:
+
+```sql
+-- Before: V4.03 count
+SELECT COUNT(*) FROM pipeline.review_spans WHERE urt_primary = 'V4.03';
+-- Expected: ~319
+
+-- After reclassification, target:
+-- V4.03: ~100 (true generic)
+-- O1.06-O1.11: ~200 (entertainment/family)
+-- R1.06-R1.09: ~150 (recommendation/return)
+```
+
+---
+
+## Conclusion
+
+**Is the taxonomy ready for production?** ❌ **No**
+
+**Critical Issues**:
+1. ~49% of reviews mention topics without specific codes
+2. V4.03 is a catch-all masking actionable insights
+3. Industry-specific codes (entertainment, F&B) are missing
+
+**Recommendation**: Add 14 new subcodes before production to capture:
+- Family/Kids experience (O1.06, O1.07)
+- Entertainment value (O1.08, O1.09, O1.10)
+- Recommendation intent (R1.06, R1.07)
+- Return intent (R1.08, R1.09)
+- Food/Beverage (O2.06-O2.09)
+
+**Estimated Improvement**: Classification accuracy from ~50% specific to ~85% specific.
--- a/packages/reviewiq-pipeline/docs/CLASSIFICATION_SYSTEM.md
+++ b/packages/reviewiq-pipeline/docs/CLASSIFICATION_SYSTEM.md
@@ -308,11 +308,15 @@ You are a review classifier using primitive-based analysis.
  "spans": [
    {
      "text": "exact text from review",
+      "start": 0,
+      "end": 25,
      "primitive": "MANNER",
      "valence": "+",
      "intensity": 2,
      "detail": 2,
-      "confidence": 0.85
+      "confidence": 0.85,
+      "entity": null,
+      "entity_type": null
    }
  ]
 }
@@ -427,13 +431,16 @@ ORDER BY span_count DESC;
 python run_classification_v2.py [OPTIONS]

 Options:
-  --business TEXT       Business name or pattern (required for classify/evaluate)
-  --limit INT           Max reviews to process (default: 100)
-  --dry-run             Don't store results to database
-  --evaluate BUSINESS   Evaluate existing classification quality
-  --language-analysis   Analyze UNMAPPED rates by language across all data
-  --use-llm             Use real LLM classification (default: mock)
-  --model TEXT          Model for LLM (default: gpt-4o-mini)
+  --business TEXT            Business name or pattern (required for classify/evaluate)
+  --limit INT                Max reviews to process (default: 100)
+  --dry-run                  Don't store results to database
+  --evaluate BUSINESS        Evaluate existing classification quality
+  --language-analysis        Analyze UNMAPPED rates by language across all data
+  --ignore-legacy-language   Exclude rows with language='auto'/'unknown'/NULL
+  --latest-hours INT         Only include spans from last N hours
+  --use-existing             Use existing spans instead of jobs
+  --use-llm                  Use real LLM classification (requires OPENAI_API_KEY)
+  --model TEXT               Model for LLM (default: gpt-4o-mini)
 ```

 ### Models
--- a/packages/reviewiq-pipeline/prompts/wave0_sector_brief.md
+++ b/packages/reviewiq-pipeline/prompts/wave0_sector_brief.md
@@ -0,0 +1,401 @@
+# Wave 0: Sector Brief Generation Prompt
+
+## Purpose
+
+Generate a **sector brief** that provides alignment context for classification agents. This brief describes what customers care about in this sector — NOT how to classify it, NOT what primitives to use, NOT what solutions exist.
+
+## Critical Guardrails
+
+**DO:**
+- Describe customer concerns in plain language
+- Use real review language patterns
+- Focus on what customers judge, complain about, praise
+- Include industry-specific terminology
+- Identify mode-specific concerns (dine-in vs delivery, etc.)
+
+**DO NOT:**
+- Assign primitive codes
+- Suggest priorities or weights
+- Propose solutions or playbooks
+- Define new categories or dimensions
+- Include KPIs or metrics
+- Make up statistics
+
+---
+
+## Input
+
+You will receive:
+
+```json
+{
+  "sector_code": "FOOD_DINING",
+  "sector_name": "Food & Dining",
+  "description": "Restaurants, cafes, bars, bakeries, food trucks, catering services",
+  "sample_business_types": [
+    "Restaurants",
+    "Cafes & Coffee",
+    "Bars & Nightlife",
+    "Bakeries & Desserts",
+    "Food Services",
+    "Quick Service"
+  ]
+}
+```
+
+---
+
+## Output Schema
+
+Return ONLY valid JSON matching this schema:
+
+```json
+{
+  "sector_code": "string",
+  "sector_name": "string",
+  "generated_at": "ISO timestamp",
+  "version": "1.0",
+
+  "what_customers_judge": {
+    "description": "The primary dimensions customers evaluate in this sector",
+    "items": [
+      {
+        "aspect": "string (2-5 words)",
+        "importance": "critical | high | moderate",
+        "why_it_matters": "string (1 sentence)"
+      }
+    ]
+  },
+
+  "critical_pain_points": {
+    "description": "What damages reputation most severely in this sector",
+    "items": [
+      {
+        "pain_point": "string (2-5 words)",
+        "typical_language": ["array of phrases customers actually use"],
+        "reputation_impact": "severe | significant | moderate"
+      }
+    ]
+  },
+
+  "common_praise": {
+    "description": "What earns customer loyalty and positive reviews",
+    "items": [
+      {
+        "praise_area": "string (2-5 words)",
+        "typical_language": ["array of phrases customers actually use"],
+        "loyalty_impact": "high | moderate"
+      }
+    ]
+  },
+
+  "industry_terminology": {
+    "description": "Domain-specific vocabulary used in this sector",
+    "staff_terms": ["waiter", "server", "bartender", "chef"],
+    "product_terms": ["dish", "meal", "appetizer", "entree"],
+    "process_terms": ["reservation", "seating", "check", "tab"],
+    "quality_terms": ["fresh", "authentic", "homemade"],
+    "problem_terms": ["cold", "undercooked", "wrong order"]
+  },
+
+  "mode_specific_concerns": {
+    "description": "Different service modes have different customer priorities",
+    "modes": [
+      {
+        "mode": "string (e.g., 'Dine-in', 'Takeout', 'Delivery')",
+        "primary_concerns": ["array of top concerns for this mode"],
+        "unique_pain_points": ["pain points specific to this mode"]
+      }
+    ]
+  },
+
+  "what_is_actionable": {
+    "description": "Feedback that businesses can realistically act on",
+    "actionable_examples": [
+      {
+        "feedback_type": "string",
+        "example": "string",
+        "action_owner": "string (role/team that can fix it)"
+      }
+    ],
+    "not_actionable_examples": [
+      {
+        "feedback_type": "string",
+        "example": "string",
+        "why_not_actionable": "string"
+      }
+    ]
+  },
+
+  "sector_specific_signals": {
+    "description": "Signals that have different meaning in this sector vs others",
+    "examples": [
+      {
+        "signal": "string",
+        "meaning_in_this_sector": "string",
+        "contrast_with": "string (how it differs in other sectors)"
+      }
+    ]
+  }
+}
+```
+
+---
+
+## Example Output (Food & Dining)
+
+```json
+{
+  "sector_code": "FOOD_DINING",
+  "sector_name": "Food & Dining",
+  "generated_at": "2026-01-31T10:00:00Z",
+  "version": "1.0",
+
+  "what_customers_judge": {
+    "description": "The primary dimensions customers evaluate in this sector",
+    "items": [
+      {
+        "aspect": "Food taste and quality",
+        "importance": "critical",
+        "why_it_matters": "The core product - customers primarily visit for the food experience"
+      },
+      {
+        "aspect": "Service speed and attentiveness",
+        "importance": "critical",
+        "why_it_matters": "Direct impact on dining experience and whether they feel valued"
+      },
+      {
+        "aspect": "Cleanliness and hygiene",
+        "importance": "critical",
+        "why_it_matters": "Health/safety concern that can override all other positives if failed"
+      },
+      {
+        "aspect": "Value for money",
+        "importance": "high",
+        "why_it_matters": "Portion size and quality relative to price affects return intent"
+      },
+      {
+        "aspect": "Ambiance and atmosphere",
+        "importance": "moderate",
+        "why_it_matters": "Sets expectations and affects overall enjoyment, especially for special occasions"
+      }
+    ]
+  },
+
+  "critical_pain_points": {
+    "description": "What damages reputation most severely in this sector",
+    "items": [
+      {
+        "pain_point": "Food safety incidents",
+        "typical_language": ["got sick", "food poisoning", "found hair", "bug in food", "raw chicken"],
+        "reputation_impact": "severe"
+      },
+      {
+        "pain_point": "Cold or wrong food",
+        "typical_language": ["food was cold", "wrong order", "not what I ordered", "missing items"],
+        "reputation_impact": "significant"
+      },
+      {
+        "pain_point": "Rude or dismissive staff",
+        "typical_language": ["rude waiter", "ignored us", "attitude", "condescending", "eye roll"],
+        "reputation_impact": "significant"
+      },
+      {
+        "pain_point": "Excessive wait times",
+        "typical_language": ["waited forever", "40 minutes for food", "never came back", "forgotten"],
+        "reputation_impact": "significant"
+      },
+      {
+        "pain_point": "Dirty facilities",
+        "typical_language": ["dirty bathroom", "sticky table", "flies everywhere", "unclean"],
+        "reputation_impact": "severe"
+      }
+    ]
+  },
+
+  "common_praise": {
+    "description": "What earns customer loyalty and positive reviews",
+    "items": [
+      {
+        "praise_area": "Exceptional food quality",
+        "typical_language": ["best I've ever had", "delicious", "perfectly cooked", "authentic", "fresh ingredients"],
+        "loyalty_impact": "high"
+      },
+      {
+        "praise_area": "Attentive friendly service",
+        "typical_language": ["amazing server", "made us feel welcome", "remembered us", "went above and beyond"],
+        "loyalty_impact": "high"
+      },
+      {
+        "praise_area": "Great value",
+        "typical_language": ["huge portions", "great price", "worth every penny", "can't beat it"],
+        "loyalty_impact": "high"
+      },
+      {
+        "praise_area": "Perfect ambiance",
+        "typical_language": ["beautiful setting", "romantic", "cozy atmosphere", "perfect for date night"],
+        "loyalty_impact": "moderate"
+      }
+    ]
+  },
+
+  "industry_terminology": {
+    "description": "Domain-specific vocabulary used in this sector",
+    "staff_terms": ["server", "waiter", "waitress", "host", "hostess", "bartender", "chef", "cook", "manager", "busboy"],
+    "product_terms": ["dish", "meal", "appetizer", "entree", "main course", "dessert", "special", "daily special", "sides"],
+    "process_terms": ["reservation", "walk-in", "seated", "table", "check", "bill", "tab", "tip", "takeout", "to-go", "delivery"],
+    "quality_terms": ["fresh", "homemade", "authentic", "crispy", "tender", "juicy", "flavorful", "seasoned", "cooked to perfection"],
+    "problem_terms": ["cold", "lukewarm", "overcooked", "undercooked", "raw", "burnt", "soggy", "bland", "stale", "greasy"]
+  },
+
+  "mode_specific_concerns": {
+    "description": "Different service modes have different customer priorities",
+    "modes": [
+      {
+        "mode": "Dine-in",
+        "primary_concerns": ["ambiance", "service attentiveness", "wait time to be seated", "table cleanliness"],
+        "unique_pain_points": ["loud neighbors", "rushed out", "ignored by server", "wrong seating"]
+      },
+      {
+        "mode": "Takeout",
+        "primary_concerns": ["order accuracy", "ready on time", "packaging quality", "ease of pickup"],
+        "unique_pain_points": ["order not ready", "missing items", "cold by pickup", "wrong order in bag"]
+      },
+      {
+        "mode": "Delivery",
+        "primary_concerns": ["delivery time", "food temperature", "order accuracy", "packaging integrity"],
+        "unique_pain_points": ["arrived cold", "leaked in bag", "missing sauces", "driver got lost", "late delivery"]
+      },
+      {
+        "mode": "Catering",
+        "primary_concerns": ["on-time setup", "quantity accuracy", "presentation", "dietary accommodation"],
+        "unique_pain_points": ["not enough food", "late arrival", "wrong items", "poor presentation"]
+      }
+    ]
+  },
+
+  "what_is_actionable": {
+    "description": "Feedback that businesses can realistically act on",
+    "actionable_examples": [
+      {
+        "feedback_type": "Specific staff behavior",
+        "example": "Our server John was rude and rolled his eyes when we asked for substitutions",
+        "action_owner": "Front of house manager"
+      },
+      {
+        "feedback_type": "Food quality issue",
+        "example": "The chicken was undercooked - pink in the middle",
+        "action_owner": "Kitchen manager / Chef"
+      },
+      {
+        "feedback_type": "Facility issue",
+        "example": "Men's bathroom was out of soap and paper towels",
+        "action_owner": "Facilities / Shift manager"
+      },
+      {
+        "feedback_type": "Process issue",
+        "example": "Waited 20 minutes to get our check after flagging the server twice",
+        "action_owner": "FOH manager / Training"
+      }
+    ],
+    "not_actionable_examples": [
+      {
+        "feedback_type": "Subjective taste preference",
+        "example": "I just don't like spicy food",
+        "why_not_actionable": "Personal preference, not a quality issue"
+      },
+      {
+        "feedback_type": "Location/parking",
+        "example": "Hard to find parking in this area",
+        "why_not_actionable": "External factor beyond restaurant control"
+      },
+      {
+        "feedback_type": "Price objection without context",
+        "example": "Too expensive",
+        "why_not_actionable": "Vague, no specifics on what was mispriced"
+      }
+    ]
+  },
+
+  "sector_specific_signals": {
+    "description": "Signals that have different meaning in this sector vs others",
+    "examples": [
+      {
+        "signal": "long wait",
+        "meaning_in_this_sector": "Usually negative - food taking too long, being ignored",
+        "contrast_with": "Healthcare: expected and sometimes indicates thoroughness"
+      },
+      {
+        "signal": "portion size",
+        "meaning_in_this_sector": "Critical value indicator - directly affects perceived value",
+        "contrast_with": "Healthcare: not applicable"
+      },
+      {
+        "signal": "noisy",
+        "meaning_in_this_sector": "Context-dependent - negative for fine dining, expected at sports bars",
+        "contrast_with": "Professional services: always negative"
+      }
+    ]
+  }
+}
+```
+
+---
+
+## Sectors to Generate
+
+Generate one brief for each of these L1 sectors:
+
+| Code | Sector Name | Sample Business Types |
+|------|-------------|----------------------|
+| `FOOD_DINING` | Food & Dining | Restaurants, Cafes, Bars, Bakeries, Catering |
+| `RETAIL_SHOPPING` | Retail & Shopping | Clothing, Electronics, Grocery, Specialty stores |
+| `AUTOMOTIVE` | Automotive | Dealers, Repair, Car Wash, Parts |
+| `HEALTHCARE` | Healthcare | Hospitals, Clinics, Dental, Mental Health, Veterinary |
+| `PROFESSIONAL_SERVICES` | Professional Services | Legal, Accounting, Consulting, Marketing |
+| `HOME_SERVICES` | Home Services | Plumbing, Electrical, HVAC, Cleaning, Landscaping |
+| `PERSONAL_SERVICES` | Personal Services | Salons, Spas, Fitness, Tattoo |
+| `EDUCATION` | Education | Schools, Tutoring, Driving Schools, Language |
+| `HOSPITALITY_TRAVEL` | Hospitality & Travel | Hotels, Tours, Travel Agencies |
+| `ENTERTAINMENT` | Entertainment | Movies, Museums, Amusement Parks, Sports |
+| `FINANCE_INSURANCE` | Finance & Insurance | Banks, Insurance, Investment, Loans |
+| `REAL_ESTATE` | Real Estate | Agents, Property Management, Appraisers |
+| `INDUSTRIAL` | Industrial | Manufacturing, Construction, Warehousing |
+| `TRANSPORTATION` | Transportation | Taxis, Moving, Shipping, Courier |
+| `GOVERNMENT` | Government | DMV, Courts, Public Services |
+| `EVENTS_WEDDINGS` | Events & Weddings | Wedding Venues, Planners, DJ, Photography |
+| `RELIGIOUS` | Religious | Churches, Temples, Mosques, Spiritual |
+| `NONPROFIT` | Non-Profit | Charities, Community Organizations |
+| `TECHNOLOGY` | Technology | IT Services, Software, Web Design |
+| `PETS_ANIMALS` | Pets & Animals | Pet Stores, Grooming, Boarding, Training |
+
+---
+
+## Usage
+
+This brief will be injected into Wave 1 and Wave 2 prompts as alignment context:
+
+```
+You are configuring primitives for: {sector_name}
+
+## Sector Context (READ-ONLY, do not modify or extend)
+
+{sector_brief_json}
+
+## Your Task
+
+Using the above context to understand what matters in this sector,
+configure the following primitives...
+```
+
+---
+
+## Validation
+
+Before returning, verify:
+- [ ] All arrays have 3-10 items (not empty, not excessive)
+- [ ] `typical_language` arrays contain realistic review phrases
+- [ ] No primitive codes, priorities, or solutions appear anywhere
+- [ ] Industry terminology is accurate for this sector
+- [ ] Modes are appropriate for the sector (not all sectors have delivery)
+- [ ] Actionable vs not-actionable distinction is clear
--- a/packages/reviewiq-pipeline/run_classification.py
+++ b/packages/reviewiq-pipeline/run_classification.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python
+"""
+Run classification pipeline for a scraping job.
+
+Usage:
+    python run_classification.py 22c747a6-b913-4ae4-82bc-14b4195008b6
+"""
+
+import asyncio
+import logging
+import os
+import sys
+from datetime import datetime
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger("run_classification")
+
+
+async def run_pipeline(job_id: str):
+    """Run the classification pipeline for a job."""
+    from reviewiq_pipeline import Pipeline
+    from reviewiq_pipeline.config import Config
+
+    # Get database URL from environment or use default
+    database_url = os.environ.get(
+        "DATABASE_URL",
+        "postgresql://scraper:scraper123@localhost:5437/scraper"
+    )
+
+    logger.info(f"Processing job {job_id}")
+
+    # Initialize pipeline
+    config = Config(
+        database_url=database_url,
+        llm_provider="anthropic",
+        llm_model="claude-sonnet-4-5-20250929",
+        openai_api_key=os.environ.get("OPENAI_API_KEY"),
+        anthropic_api_key="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",
+        classification_batch_size=25,
+        classification_max_concurrent=5,
+        classification_target_utilization=0.70,
+    )
+
+    pipeline = Pipeline(config)
+
+    try:
+        await pipeline.initialize()
+        logger.info("Pipeline initialized")
+
+        # Run all stages (normalize, classify, route, aggregate)
+        # Just pass job_id - pipeline will fetch and transform reviews from database
+        logger.info("Starting pipeline execution...")
+        start_time = datetime.now()
+
+        result = await pipeline.process(
+            {"job_id": job_id},
+            stages=["normalize", "classify", "route", "aggregate"],
+        )
+
+        elapsed = (datetime.now() - start_time).total_seconds()
+
+        # Print results
+        if result.success:
+            logger.info(f"Pipeline completed successfully in {elapsed:.1f}s")
+        else:
+            logger.warning(f"Pipeline completed with errors in {elapsed:.1f}s")
+            if result.error:
+                logger.error(f"Error: {result.error}")
+
+        # Stage summaries
+        for stage_name, stage_result in result.stage_results.items():
+            # Handle both object and dict access
+            success = getattr(stage_result, 'success', None) or stage_result.get('success', False)
+            data = getattr(stage_result, 'data', None) or stage_result.get('data', {})
+            error = getattr(stage_result, 'error', None) or stage_result.get('error')
+            duration_ms = getattr(stage_result, 'duration_ms', None) or stage_result.get('duration_ms', 0)
+
+            if success:
+                stats = data.get("stats", {}) if data else {}
+
+                if stage_name == "normalize":
+                    logger.info(f"  Stage 1 (Normalize): {stats.get('output_count', '?')} reviews")
+                elif stage_name == "classify":
+                    logger.info(
+                        f"  Stage 2 (Classify): {stats.get('success_count', '?')} reviews, "
+                        f"{stats.get('total_spans', '?')} spans, "
+                        f"${stats.get('llm_cost_usd', 0):.4f} LLM cost"
+                    )
+                elif stage_name == "route":
+                    logger.info(
+                        f"  Stage 3 (Route): {stats.get('spans_routed', '?')} spans, "
+                        f"{stats.get('issues_created', '?')} issues"
+                    )
+                elif stage_name == "aggregate":
+                    logger.info(f"  Stage 4 (Aggregate): {stats.get('facts_upserted', '?')} facts")
+
+                logger.info(f"    Duration: {duration_ms}ms")
+            else:
+                logger.error(f"  {stage_name}: FAILED - {error}")
+
+        return result
+
+    except Exception as e:
+        logger.exception(f"Pipeline failed: {e}")
+        raise
+    finally:
+        await pipeline.close()
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python run_classification.py <job_id>")
+        sys.exit(1)
+
+    job_id = sys.argv[1]
+
+    # Validate UUID format
+    import uuid
+    try:
+        uuid.UUID(job_id)
+    except ValueError:
+        print(f"Invalid job ID format: {job_id}")
+        sys.exit(1)
+
+    result = asyncio.run(run_pipeline(job_id))
+
+    if result and not result.success:
+        sys.exit(1)
--- a/packages/reviewiq-pipeline/scripts/backfill_review_facts.py
+++ b/packages/reviewiq-pipeline/scripts/backfill_review_facts.py
@@ -0,0 +1,409 @@
+#!/usr/bin/env python3
+"""
+Backfill review_facts_v1 from public.jobs.reviews_data.
+
+Parses relative timestamps ("17 hours ago", "2 weeks ago") into absolute
+timestamps anchored to job.created_at.
+
+Usage:
+    python backfill_review_facts.py
+    python backfill_review_facts.py --dry-run
+    python backfill_review_facts.py --job-id <uuid>
+"""
+
+import argparse
+import asyncio
+import json
+import os
+import re
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+import asyncpg
+
+# Database URL
+DB_URL = os.environ.get(
+    "DATABASE_URL",
+    "postgresql://scraper:scraper123@localhost:5437/scraper"
+)
+
+
+# =============================================================================
+# RELATIVE TIMESTAMP PARSER
+# =============================================================================
+
+# Regex patterns for relative timestamps
+RELATIVE_PATTERNS = [
+    # "17 hours ago", "2 weeks ago", "a month ago"
+    (r"(?:edited\s+)?(\d+|a|an)\s+(second|minute|hour|day|week|month|year)s?\s+ago", "standard"),
+    # "just now"
+    (r"just\s+now", "just_now"),
+    # "yesterday"
+    (r"yesterday", "yesterday"),
+    # "today"
+    (r"today", "today"),
+]
+
+# Time unit multipliers (in seconds)
+TIME_UNITS = {
+    "second": 1,
+    "minute": 60,
+    "hour": 3600,
+    "day": 86400,
+    "week": 604800,
+    "month": 2592000,  # 30 days
+    "year": 31536000,  # 365 days
+}
+
+
+def parse_relative_timestamp(raw: str, reference_time: datetime) -> datetime | None:
+    """
+    Parse a relative timestamp string into an absolute datetime.
+
+    Args:
+        raw: Relative timestamp like "17 hours ago", "Edited 2 weeks ago"
+        reference_time: The reference point (usually job.created_at)
+
+    Returns:
+        Absolute datetime or None if parsing failed
+    """
+    if not raw:
+        return None
+
+    text = raw.lower().strip()
+
+    # Handle "just now"
+    if "just now" in text:
+        return reference_time
+
+    # Handle "yesterday"
+    if text == "yesterday":
+        return reference_time - timedelta(days=1)
+
+    # Handle "today"
+    if text == "today":
+        return reference_time
+
+    # Handle standard relative format
+    # Remove "edited " prefix if present
+    text = re.sub(r"^edited\s+", "", text)
+
+    # Match "N unit(s) ago"
+    match = re.match(r"(\d+|a|an)\s+(second|minute|hour|day|week|month|year)s?\s+ago", text)
+    if match:
+        quantity_str = match.group(1)
+        unit = match.group(2)
+
+        # Convert "a"/"an" to 1
+        if quantity_str in ("a", "an"):
+            quantity = 1
+        else:
+            quantity = int(quantity_str)
+
+        seconds = quantity * TIME_UNITS.get(unit, 0)
+        return reference_time - timedelta(seconds=seconds)
+
+    # Unknown format
+    return None
+
+
+def parse_relative_timestamp_safe(raw: str, reference_time: datetime) -> tuple[datetime | None, bool]:
+    """
+    Safe wrapper that returns (parsed_time, success).
+    """
+    try:
+        result = parse_relative_timestamp(raw, reference_time)
+        return result, result is not None
+    except Exception:
+        return None, False
+
+
+# =============================================================================
+# BACKFILL LOGIC
+# =============================================================================
+
+async def get_jobs_with_reviews(pool: asyncpg.Pool, job_id: str | None = None) -> list[dict]:
+    """Get all jobs with reviews_data."""
+    if job_id:
+        query = """
+            SELECT job_id, created_at, reviews_data,
+                   COALESCE(metadata->>'business_name', url) as business_id
+            FROM public.jobs
+            WHERE job_id = $1
+              AND reviews_data IS NOT NULL
+              AND jsonb_typeof(reviews_data) = 'array'
+        """
+        rows = await pool.fetch(query, job_id)
+    else:
+        query = """
+            SELECT job_id, created_at, reviews_data,
+                   COALESCE(metadata->>'business_name', url) as business_id
+            FROM public.jobs
+            WHERE reviews_data IS NOT NULL
+              AND jsonb_typeof(reviews_data) = 'array'
+            ORDER BY created_at DESC
+        """
+        rows = await pool.fetch(query)
+
+    return [dict(r) for r in rows]
+
+
+async def get_run_id_for_job(pool: asyncpg.Pool, job_id: str) -> str | None:
+    """Get the run_id associated with a job from detected_spans_v2."""
+    row = await pool.fetchrow("""
+        SELECT DISTINCT run_id FROM pipeline.detected_spans_v2
+        WHERE job_id = $1 AND run_id IS NOT NULL
+        LIMIT 1
+    """, job_id)
+    return str(row["run_id"]) if row and row["run_id"] else None
+
+
+async def get_language_for_review(pool: asyncpg.Pool, review_id: str) -> str | None:
+    """Get detected language for a review from spans."""
+    row = await pool.fetchrow("""
+        SELECT language FROM pipeline.detected_spans_v2
+        WHERE review_id = $1 AND language IS NOT NULL
+        LIMIT 1
+    """, review_id)
+    return row["language"] if row else None
+
+
+async def upsert_review_facts(
+    pool: asyncpg.Pool,
+    facts: list[dict],
+    dry_run: bool = False,
+) -> tuple[int, int]:
+    """
+    Upsert review facts into the database.
+
+    Returns:
+        (inserted_count, updated_count)
+    """
+    if dry_run or not facts:
+        return 0, 0
+
+    # Use executemany with ON CONFLICT
+    query = """
+        INSERT INTO pipeline.review_facts_v1
+        (review_id, business_id, job_id, run_id, rating, review_time_utc, raw_timestamp, author, language)
+        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+        ON CONFLICT (review_id) DO UPDATE SET
+            business_id = EXCLUDED.business_id,
+            job_id = EXCLUDED.job_id,
+            run_id = COALESCE(EXCLUDED.run_id, pipeline.review_facts_v1.run_id),
+            rating = EXCLUDED.rating,
+            review_time_utc = EXCLUDED.review_time_utc,
+            raw_timestamp = EXCLUDED.raw_timestamp,
+            author = EXCLUDED.author,
+            language = COALESCE(EXCLUDED.language, pipeline.review_facts_v1.language)
+    """
+
+    # Prepare records
+    records = [
+        (
+            f["review_id"],
+            f["business_id"],
+            f["job_id"],
+            f.get("run_id"),
+            f.get("rating"),
+            f.get("review_time_utc"),
+            f.get("raw_timestamp"),
+            f.get("author"),
+            f.get("language"),
+        )
+        for f in facts
+    ]
+
+    await pool.executemany(query, records)
+    return len(records), 0
+
+
+async def backfill_job(
+    pool: asyncpg.Pool,
+    job: dict,
+    dry_run: bool = False,
+    verbose: bool = False,
+) -> dict[str, Any]:
+    """
+    Backfill review facts for a single job.
+
+    Returns:
+        Stats dict with counts and errors
+    """
+    job_id = job["job_id"]
+    job_created = job["created_at"]
+    business_id = job["business_id"]
+    reviews_data = job["reviews_data"]
+
+    # asyncpg may return JSONB as string
+    if isinstance(reviews_data, str):
+        reviews_data = json.loads(reviews_data)
+
+    # Make job_created timezone-aware if it isn't
+    if job_created.tzinfo is None:
+        job_created = job_created.replace(tzinfo=timezone.utc)
+
+    # Get run_id for this job
+    run_id = await get_run_id_for_job(pool, str(job_id))
+
+    stats = {
+        "job_id": str(job_id),
+        "total_reviews": 0,
+        "parsed_ok": 0,
+        "parsed_failed": 0,
+        "inserted": 0,
+        "sample_failures": [],
+    }
+
+    facts = []
+
+    for review in reviews_data:
+        stats["total_reviews"] += 1
+
+        # Handle both dict and JSON string
+        if isinstance(review, str):
+            try:
+                review = json.loads(review)
+            except json.JSONDecodeError:
+                continue
+
+        review_id = review.get("review_id")
+        if not review_id:
+            continue
+
+        raw_timestamp = review.get("timestamp", "")
+        review_time, success = parse_relative_timestamp_safe(raw_timestamp, job_created)
+
+        if success:
+            stats["parsed_ok"] += 1
+        else:
+            stats["parsed_failed"] += 1
+            if len(stats["sample_failures"]) < 5:
+                stats["sample_failures"].append(raw_timestamp)
+
+        # Get language from spans if available
+        language = await get_language_for_review(pool, review_id) if not dry_run else None
+
+        facts.append({
+            "review_id": review_id,
+            "business_id": business_id,
+            "job_id": job_id,
+            "run_id": run_id,
+            "rating": review.get("rating"),
+            "review_time_utc": review_time,
+            "raw_timestamp": raw_timestamp,
+            "author": review.get("author"),
+            "language": language,
+        })
+
+    # Upsert
+    inserted, _ = await upsert_review_facts(pool, facts, dry_run=dry_run)
+    stats["inserted"] = inserted
+
+    if verbose:
+        print(f"  Job {job_id}: {stats['total_reviews']} reviews, "
+              f"{stats['parsed_ok']} parsed OK, {stats['parsed_failed']} failed")
+        if stats["sample_failures"]:
+            print(f"    Sample failures: {stats['sample_failures'][:3]}")
+
+    return stats
+
+
+async def backfill_all(
+    pool: asyncpg.Pool,
+    job_id: str | None = None,
+    dry_run: bool = False,
+    verbose: bool = False,
+) -> dict[str, Any]:
+    """
+    Backfill review facts for all jobs (or a specific job).
+
+    Returns:
+        Aggregate stats
+    """
+    jobs = await get_jobs_with_reviews(pool, job_id)
+
+    print(f"\n{'[DRY RUN] ' if dry_run else ''}Backfilling review_facts_v1 from {len(jobs)} jobs...")
+
+    aggregate = {
+        "jobs_processed": 0,
+        "total_reviews": 0,
+        "parsed_ok": 0,
+        "parsed_failed": 0,
+        "inserted": 0,
+        "unique_failure_patterns": set(),
+    }
+
+    for i, job in enumerate(jobs, 1):
+        if verbose:
+            print(f"\n[{i}/{len(jobs)}] Processing job {job['job_id']}...")
+
+        stats = await backfill_job(pool, job, dry_run=dry_run, verbose=verbose)
+
+        aggregate["jobs_processed"] += 1
+        aggregate["total_reviews"] += stats["total_reviews"]
+        aggregate["parsed_ok"] += stats["parsed_ok"]
+        aggregate["parsed_failed"] += stats["parsed_failed"]
+        aggregate["inserted"] += stats["inserted"]
+        aggregate["unique_failure_patterns"].update(stats["sample_failures"])
+
+    # Convert set to list for JSON serialization
+    aggregate["unique_failure_patterns"] = list(aggregate["unique_failure_patterns"])[:20]
+
+    return aggregate
+
+
+# =============================================================================
+# CLI
+# =============================================================================
+
+async def main_async(args):
+    """Main async entry point."""
+    pool = await asyncpg.create_pool(DB_URL)
+
+    try:
+        stats = await backfill_all(
+            pool,
+            job_id=args.job_id,
+            dry_run=args.dry_run,
+            verbose=args.verbose,
+        )
+
+        print("\n" + "=" * 60)
+        print("BACKFILL COMPLETE")
+        print("=" * 60)
+        print(f"Jobs processed:    {stats['jobs_processed']}")
+        print(f"Total reviews:     {stats['total_reviews']}")
+        print(f"Timestamps parsed: {stats['parsed_ok']} ({stats['parsed_ok']/max(stats['total_reviews'],1)*100:.1f}%)")
+        print(f"Timestamps failed: {stats['parsed_failed']} ({stats['parsed_failed']/max(stats['total_reviews'],1)*100:.1f}%)")
+        if not args.dry_run:
+            print(f"Records upserted:  {stats['inserted']}")
+
+        if stats["unique_failure_patterns"]:
+            print(f"\nUnparsed timestamp patterns ({len(stats['unique_failure_patterns'])}):")
+            for p in stats["unique_failure_patterns"][:10]:
+                print(f"  - \"{p}\"")
+
+        # Calculate coverage
+        coverage = stats['parsed_ok'] / max(stats['total_reviews'], 1) * 100
+        if coverage < 90:
+            print(f"\n⚠️  WARNING: Timestamp coverage is {coverage:.1f}% (target: >90%)")
+        else:
+            print(f"\n✅ Timestamp coverage: {coverage:.1f}%")
+
+    finally:
+        await pool.close()
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Backfill review_facts_v1")
+    parser.add_argument("--job-id", help="Process a specific job only")
+    parser.add_argument("--dry-run", action="store_true", help="Don't write to database")
+    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
+
+    args = parser.parse_args()
+    asyncio.run(main_async(args))
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/reviewiq-pipeline/scripts/config_resolver_standalone.py
+++ b/packages/reviewiq-pipeline/scripts/config_resolver_standalone.py
@@ -0,0 +1,226 @@
+"""
+Config Resolver - Standalone version for scripts.
+
+Resolves L1 config + sector brief for classification.
+"""
+
+import json
+import logging
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Paths
+DATA_DIR = Path(__file__).parent.parent / "data"
+CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
+L2_CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l2"
+BRIEFS_DIR = DATA_DIR / "sector_briefs"
+
+# Meta primitives - always enabled
+META_PRIMITIVES = frozenset([
+    "HONESTY", "ETHICS", "PROMISES",
+    "ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",
+    "RETURN_INTENT", "RECOMMEND", "RECOGNITION",
+    "UNMAPPED",
+])
+
+# Core primitives dictionary
+CORE_PRIMITIVES = {
+    "TASTE": {"domain": "O", "name": "Taste/Flavor", "def": "Sensory quality of food/beverage"},
+    "CRAFT": {"domain": "O", "name": "Craftsmanship", "def": "Skill of execution/preparation"},
+    "FRESHNESS": {"domain": "O", "name": "Freshness", "def": "Newness, not stale or old"},
+    "TEMPERATURE": {"domain": "O", "name": "Temperature", "def": "Hot/cold as expected"},
+    "EFFECTIVENESS": {"domain": "O", "name": "Effectiveness", "def": "Achieves intended purpose"},
+    "ACCURACY": {"domain": "O", "name": "Accuracy", "def": "Correct, as ordered/specified"},
+    "CONDITION": {"domain": "O", "name": "Condition", "def": "Physical state, wear, damage"},
+    "CONSISTENCY": {"domain": "O", "name": "Consistency", "def": "Same quality each time"},
+    "MANNER": {"domain": "P", "name": "Manner/Attitude", "def": "Friendliness, respect, warmth"},
+    "COMPETENCE": {"domain": "P", "name": "Competence", "def": "Knowledge and skill of staff"},
+    "ATTENTIVENESS": {"domain": "P", "name": "Attentiveness", "def": "Being present, responsive"},
+    "COMMUNICATION": {"domain": "P", "name": "Communication", "def": "Clarity, listening, updates"},
+    "SPEED": {"domain": "J", "name": "Speed/Wait", "def": "Time to service, waiting"},
+    "FRICTION": {"domain": "J", "name": "Friction", "def": "Obstacles, hassles, complexity"},
+    "RELIABILITY": {"domain": "J", "name": "Reliability", "def": "Dependable, keeps promises"},
+    "AVAILABILITY": {"domain": "J", "name": "Availability", "def": "Open when needed, bookable"},
+    "CLEANLINESS": {"domain": "E", "name": "Cleanliness", "def": "Hygiene, tidiness"},
+    "COMFORT": {"domain": "E", "name": "Comfort", "def": "Physical ease, seating"},
+    "SAFETY": {"domain": "E", "name": "Safety", "def": "Free from harm/danger"},
+    "AMBIANCE": {"domain": "E", "name": "Ambiance", "def": "Atmosphere, mood, vibe"},
+    "ACCESSIBILITY": {"domain": "E", "name": "Accessibility", "def": "Easy to reach, navigate"},
+    "DIGITAL_UX": {"domain": "E", "name": "Digital Experience", "def": "Website, app, online"},
+    "PRICE_LEVEL": {"domain": "V", "name": "Price Level", "def": "Absolute cost (cheap/expensive)"},
+    "PRICE_FAIRNESS": {"domain": "V", "name": "Price Fairness", "def": "Reasonable for what you get"},
+    "PRICE_TRANSPARENCY": {"domain": "V", "name": "Price Transparency", "def": "No hidden fees, clear pricing"},
+    "VALUE_FOR_MONEY": {"domain": "V", "name": "Value for Money", "def": "Worth what you paid"},
+}
+
+
+class ConfigResolver:
+    """Resolves classification config for a business."""
+
+    def __init__(self):
+        self._l1_cache: dict[str, dict] = {}
+        self._l2_cache: dict[str, dict] = {}
+        self._brief_cache: dict[str, dict] = {}
+
+    def _load_l2_configs(self) -> list[dict[str, Any]]:
+        """Load all L2 config files."""
+        if not L2_CONFIGS_DIR.exists():
+            return []
+
+        configs = []
+        for config_path in L2_CONFIGS_DIR.glob("*_config.json"):
+            try:
+                with open(config_path) as f:
+                    config = json.load(f)
+                    configs.append(config)
+            except Exception as e:
+                logger.warning(f"Failed to load L2 config {config_path}: {e}")
+        return configs
+
+    def _find_matching_l2(self, gbp_path: str) -> dict[str, Any] | None:
+        """Find L2 config that matches the GBP path (most specific wins)."""
+        l2_configs = self._load_l2_configs()
+
+        # Find all matching configs (path starts with L2 gbp_path)
+        matches = []
+        for config in l2_configs:
+            l2_path = config.get("gbp_path", "")
+            if gbp_path.startswith(l2_path) or gbp_path == l2_path:
+                matches.append((len(l2_path), config))
+
+        if not matches:
+            return None
+
+        # Return most specific match (longest path)
+        matches.sort(key=lambda x: x[0], reverse=True)
+        return matches[0][1]
+
+    def _apply_l2_delta(self, l1_config: dict, l2_config: dict) -> dict:
+        """Apply L2 delta to L1 config."""
+        result = l1_config.copy()
+        delta = l2_config.get("delta", {})
+
+        # Enable additional primitives
+        if "enable" in delta:
+            enabled = set(result.get("enabled", []))
+            enabled.update(delta["enable"])
+            result["enabled"] = list(enabled)
+
+        # Merge weights
+        if "weights" in delta:
+            weights = dict(result.get("weights", {}))
+            weights.update(delta["weights"])
+            result["weights"] = weights
+
+        # Update config version to indicate L2
+        result["config_version"] = l2_config.get("config_version", result.get("config_version", "1.0"))
+        result["l2_applied"] = l2_config.get("gbp_path")
+
+        return result
+
+    def _load_l1_config(self, sector_code: str) -> dict[str, Any] | None:
+        if sector_code in self._l1_cache:
+            return self._l1_cache[sector_code]
+
+        config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+        if not config_path.exists():
+            return None
+
+        with open(config_path) as f:
+            config = json.load(f)
+
+        self._l1_cache[sector_code] = config
+        return config
+
+    def _load_sector_brief(self, sector_code: str) -> dict[str, Any] | None:
+        if sector_code in self._brief_cache:
+            return self._brief_cache[sector_code]
+
+        brief_path = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
+        if not brief_path.exists():
+            return None
+
+        with open(brief_path) as f:
+            brief = json.load(f)
+
+        self._brief_cache[sector_code] = brief
+        return brief
+
+    async def get_business_mapping(self, pool, business_id: str) -> dict[str, Any] | None:
+        query = """
+            SELECT business_id, gbp_path::text, sector_code
+            FROM pipeline.business_taxonomy_map
+            WHERE business_id = $1
+        """
+        row = await pool.fetchrow(query, business_id)
+        return dict(row) if row else None
+
+    def resolve_enabled_set(self, l1_config: dict) -> set[str]:
+        enabled = set(l1_config.get("enabled", []))
+        enabled.update(META_PRIMITIVES)
+        return enabled
+
+    def build_primitives_for_prompt(self, enabled: set[str], weights: dict[str, float]) -> dict[str, dict]:
+        result = {}
+        for prim in enabled:
+            if prim in CORE_PRIMITIVES:
+                entry = CORE_PRIMITIVES[prim].copy()
+                if prim in weights:
+                    entry["weight"] = weights[prim]
+                result[prim] = entry
+            elif prim in META_PRIMITIVES:
+                result[prim] = {"domain": "M", "name": prim.replace("_", " ").title(), "meta": True}
+        return result
+
+    def extract_brief_signals(self, brief: dict) -> dict[str, Any]:
+        if not brief:
+            return {}
+        return {
+            "sector": brief.get("sector_code"),
+            "what_customers_judge": brief.get("what_customers_judge"),
+            "critical_pain_points": brief.get("critical_pain_points"),
+            "industry_terminology": brief.get("industry_terminology"),
+        }
+
+    async def resolve(self, business_id: str, pool, mode: str | None = None) -> dict[str, Any] | None:
+        mapping = await self.get_business_mapping(pool, business_id)
+        if not mapping:
+            return None
+
+        sector_code = mapping["sector_code"]
+        gbp_path = mapping["gbp_path"]
+
+        # Load L1 config (sector-level)
+        l1_config = self._load_l1_config(sector_code)
+        if not l1_config:
+            l1_config = {"enabled": list(CORE_PRIMITIVES.keys()), "weights": {}}
+
+        # Check for L2 config (category-level delta)
+        l2_config = self._find_matching_l2(gbp_path)
+        if l2_config:
+            logger.info(f"Applying L2 delta for {gbp_path}: {l2_config.get('gbp_path')}")
+            l1_config = self._apply_l2_delta(l1_config, l2_config)
+
+        brief = self._load_sector_brief(sector_code)
+
+        enabled = self.resolve_enabled_set(l1_config)
+        weights = dict(l1_config.get("weights", {}))
+        primitives = self.build_primitives_for_prompt(enabled, weights)
+        brief_signals = self.extract_brief_signals(brief)
+
+        return {
+            "business_id": business_id,
+            "gbp_path": gbp_path,
+            "sector_code": sector_code,
+            "config_version": l1_config.get("config_version", "1.0"),
+            "l2_applied": l1_config.get("l2_applied"),
+            "modes": [mode] if mode else ["in_person"],
+            "default_mode": mode or "in_person",
+            "enabled_primitives": sorted(enabled),
+            "disabled_primitives": sorted(l1_config.get("disabled", [])),
+            "weights": weights,
+            "brief": brief_signals,
+            "primitives": primitives,
+        }
--- a/packages/reviewiq-pipeline/scripts/fix_l1_configs.py
+++ b/packages/reviewiq-pipeline/scripts/fix_l1_configs.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""
+Fix L1 configs based on validation results.
+
+Applies fixes discovered during validation:
+1. Enable primitives that were disabled but appearing frequently
+2. Remove weights for primitives with zero appearances
+3. Add weights for high-frequency unweighted primitives
+"""
+
+import json
+from pathlib import Path
+
+CONFIGS_DIR = Path(__file__).parent.parent / "data" / "primitive_configs" / "l1"
+
+# Fixes based on validation results
+# Format: { sector: { "enable": [primitives], "disable": [primitives], "add_weight": {prim: weight}, "remove_weight": [prims] } }
+FIXES = {
+    "ENTERTAINMENT": {
+        "enable": ["CRAFT", "CONSISTENCY", "COMMUNICATION", "FRICTION"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": ["CONDITION"],  # 0 appearances despite 1.4x weight
+    },
+    "FOOD_DINING": {
+        "enable": ["PRICE_LEVEL", "ACCESSIBILITY", "PRICE_TRANSPARENCY", "FRICTION", "EFFECTIVENESS"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": [],
+    },
+    "AUTOMOTIVE": {
+        "enable": ["CRAFT", "CONSISTENCY", "PRICE_LEVEL", "AMBIANCE"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": [],
+    },
+    "HEALTHCARE": {
+        "enable": ["CRAFT", "PRICE_LEVEL", "AMBIANCE"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": [],
+    },
+    "RETAIL_SHOPPING": {
+        "enable": ["CRAFT", "PRICE_LEVEL", "AMBIANCE"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": [],
+    },
+    "HOSPITALITY_TRAVEL": {
+        "enable": ["CRAFT", "CONSISTENCY", "PRICE_LEVEL"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": [],
+    },
+    "PERSONAL_SERVICES": {
+        "enable": ["PRICE_LEVEL", "SPEED", "FRICTION"],
+        "disable": [],
+        "add_weight": {},
+        "remove_weight": [],
+    },
+}
+
+
+def fix_config(sector_code: str, fixes: dict) -> dict:
+    """Apply fixes to a sector config."""
+    config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+
+    if not config_path.exists():
+        print(f"  ⚠️  Config not found: {config_path}")
+        return None
+
+    with open(config_path) as f:
+        config = json.load(f)
+
+    enabled = set(config.get("enabled", []))
+    disabled = set(config.get("disabled", []))
+    weights = config.get("weights", {})
+
+    changes = []
+
+    # Apply enables (move from disabled to enabled)
+    for prim in fixes.get("enable", []):
+        if prim in disabled:
+            disabled.remove(prim)
+            enabled.add(prim)
+            changes.append(f"✓ Enabled {prim}")
+        elif prim not in enabled:
+            enabled.add(prim)
+            changes.append(f"✓ Added {prim} to enabled")
+
+    # Apply disables (move from enabled to disabled)
+    for prim in fixes.get("disable", []):
+        if prim in enabled:
+            enabled.remove(prim)
+            disabled.add(prim)
+            changes.append(f"✗ Disabled {prim}")
+
+    # Add weights
+    for prim, weight in fixes.get("add_weight", {}).items():
+        if prim not in weights:
+            weights[prim] = weight
+            changes.append(f"⚖️ Added weight {prim}: {weight}x")
+
+    # Remove weights
+    for prim in fixes.get("remove_weight", []):
+        if prim in weights:
+            del weights[prim]
+            changes.append(f"⚖️ Removed weight for {prim}")
+
+    # Update config
+    config["enabled"] = sorted(enabled)
+    config["disabled"] = sorted(disabled)
+    config["weights"] = dict(sorted(weights.items()))
+    config["config_version"] = "1.1"  # Bump version
+
+    # Save
+    with open(config_path, "w") as f:
+        json.dump(config, f, indent=2)
+        f.write("\n")
+
+    return changes
+
+
+def main():
+    print("=" * 60)
+    print("L1 CONFIG FIXER - Applying validation-based fixes")
+    print("=" * 60)
+
+    total_changes = 0
+
+    for sector, fixes in FIXES.items():
+        print(f"\n📁 {sector}")
+        changes = fix_config(sector, fixes)
+        if changes:
+            for change in changes:
+                print(f"   {change}")
+            total_changes += len(changes)
+        else:
+            print("   No changes applied")
+
+    print(f"\n{'=' * 60}")
+    print(f"Total changes applied: {total_changes}")
+    print("Config version bumped to 1.1")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/reviewiq-pipeline/scripts/fix_l1_configs_v2.py
+++ b/packages/reviewiq-pipeline/scripts/fix_l1_configs_v2.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+"""
+Guarded L1 Config Fixer - V2 (Threshold-based, Sector-scoped)
+
+Only applies fixes when:
+1. Evidence is from sector-scoped validation
+2. Frequency exceeds threshold (default 3%)
+3. Changes are logged with version bump
+
+Usage:
+    python fix_l1_configs_v2.py --apply         # Apply fixes from validation
+    python fix_l1_configs_v2.py --dry-run       # Show what would change
+    python fix_l1_configs_v2.py --revert SECTOR # Revert to previous version
+"""
+
+import argparse
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+CONFIGS_DIR = Path(__file__).parent.parent / "data" / "primitive_configs" / "l1"
+CHANGELOG_FILE = CONFIGS_DIR / "CHANGELOG.json"
+
+# Minimum threshold for auto-enabling (% of sector spans)
+ENABLE_THRESHOLD_PCT = 3.0
+
+# Fixes derived from sector-scoped validation (validate_l1_configs_v2.py output)
+# These are the ONLY fixes that should be applied
+SECTOR_SCOPED_FIXES = {
+    "ENTERTAINMENT": {
+        "evidence": "2,320 spans from Go Karts + Soho Club",
+        "enable": [
+            ("TASTE", 4.3, "Entertainment venues have concessions/food service"),
+        ],
+        "add_weight": [
+            ("CRAFT", 1.3, "13.4% frequency but unweighted"),
+        ],
+        "remove_weight": [],
+    },
+    "FOOD_DINING": {
+        "evidence": "61 spans from Fika cafe",
+        "enable": [
+            ("COMFORT", 9.8, "Seating/atmosphere comfort matters for cafes"),
+        ],
+        "add_weight": [
+            ("AVAILABILITY", 1.2, "16.4% frequency but unweighted"),
+        ],
+        "remove_weight": [
+            # Note: Small sample size (61 spans) - these may be false negatives
+            # Keep weights but flag for review with more data
+        ],
+    },
+    "AUTOMOTIVE": {
+        "evidence": "1,201 spans from ClickRent car rental",
+        "enable": [],  # Nothing exceeds 3% threshold
+        "add_weight": [],
+        "remove_weight": [
+            # CONDITION, HONESTY, PROMISES, RECOVERY all have 0 appearances
+            # However, may be specific to rental vs repair - keep for now
+        ],
+    },
+}
+
+
+def load_changelog() -> list[dict]:
+    """Load the changelog file."""
+    if CHANGELOG_FILE.exists():
+        with open(CHANGELOG_FILE) as f:
+            return json.load(f)
+    return []
+
+
+def save_changelog(entries: list[dict]) -> None:
+    """Save the changelog file."""
+    CHANGELOG_FILE.parent.mkdir(parents=True, exist_ok=True)
+    with open(CHANGELOG_FILE, "w") as f:
+        json.dump(entries, f, indent=2)
+        f.write("\n")
+
+
+def load_config(sector_code: str) -> dict[str, Any] | None:
+    """Load a sector config."""
+    config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+    if not config_path.exists():
+        return None
+    with open(config_path) as f:
+        return json.load(f)
+
+
+def save_config(sector_code: str, config: dict[str, Any]) -> None:
+    """Save a sector config."""
+    config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+    with open(config_path, "w") as f:
+        json.dump(config, f, indent=2)
+        f.write("\n")
+
+
+def apply_fixes(sector_code: str, fixes: dict, dry_run: bool = False) -> list[str]:
+    """Apply fixes to a sector config."""
+    config = load_config(sector_code)
+    if not config:
+        return [f"❌ Config not found for {sector_code}"]
+
+    enabled = set(config.get("enabled", []))
+    disabled = set(config.get("disabled", []))
+    weights = config.get("weights", {})
+
+    changes = []
+    evidence = fixes.get("evidence", "unknown")
+
+    # Enable primitives
+    for prim, pct, reason in fixes.get("enable", []):
+        if pct < ENABLE_THRESHOLD_PCT:
+            changes.append(f"⚠️  SKIP {prim}: {pct:.1f}% below {ENABLE_THRESHOLD_PCT}% threshold")
+            continue
+
+        if prim in disabled:
+            disabled.remove(prim)
+            enabled.add(prim)
+            changes.append(f"✓ ENABLE {prim}: {pct:.1f}% in sector data ({reason})")
+        elif prim not in enabled:
+            enabled.add(prim)
+            changes.append(f"✓ ADD {prim}: {pct:.1f}% in sector data ({reason})")
+
+    # Add weights
+    for prim, weight, reason in fixes.get("add_weight", []):
+        if prim not in weights:
+            weights[prim] = weight
+            changes.append(f"⚖️  WEIGHT {prim}: {weight}x ({reason})")
+
+    # Remove weights
+    for prim, reason in fixes.get("remove_weight", []):
+        if prim in weights:
+            del weights[prim]
+            changes.append(f"⚖️  UNWEIGHT {prim}: ({reason})")
+
+    if not changes:
+        return ["✓ No changes needed"]
+
+    if not dry_run:
+        # Bump version
+        old_version = config.get("config_version", "1.0")
+        major, minor = old_version.split(".")
+        new_version = f"{major}.{int(minor) + 1}"
+
+        config["enabled"] = sorted(enabled)
+        config["disabled"] = sorted(disabled)
+        config["weights"] = dict(sorted(weights.items()))
+        config["config_version"] = new_version
+        config["config_updated_at"] = datetime.now(timezone.utc).isoformat()
+
+        save_config(sector_code, config)
+
+        # Log to changelog
+        changelog = load_changelog()
+        changelog.append({
+            "sector": sector_code,
+            "version": new_version,
+            "previous_version": old_version,
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "evidence": evidence,
+            "changes": changes,
+        })
+        save_changelog(changelog)
+
+        changes.append(f"📝 Version: {old_version} → {new_version}")
+
+    return changes
+
+
+def revert_config(sector_code: str, to_version: str | None = None) -> list[str]:
+    """Revert a config to a previous version."""
+    changelog = load_changelog()
+
+    # Find entries for this sector
+    sector_entries = [e for e in changelog if e["sector"] == sector_code]
+    if not sector_entries:
+        return [f"❌ No changelog entries for {sector_code}"]
+
+    # TODO: Implement actual revert by storing full config snapshots
+    return [f"⚠️  Revert not yet implemented - manual restore required"]
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Guarded L1 config fixer")
+    parser.add_argument("--apply", action="store_true", help="Apply sector-scoped fixes")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would change")
+    parser.add_argument("--revert", metavar="SECTOR", help="Revert sector to previous version")
+    parser.add_argument("--sector", help="Apply to specific sector only")
+    parser.add_argument("--show-changelog", action="store_true", help="Show changelog")
+
+    args = parser.parse_args()
+
+    if args.show_changelog:
+        changelog = load_changelog()
+        print(json.dumps(changelog, indent=2))
+        return
+
+    if args.revert:
+        changes = revert_config(args.revert.upper())
+        for change in changes:
+            print(change)
+        return
+
+    if args.apply or args.dry_run:
+        print("=" * 60)
+        print(f"L1 CONFIG FIXER V2 - {'DRY RUN' if args.dry_run else 'APPLYING FIXES'}")
+        print(f"Threshold: {ENABLE_THRESHOLD_PCT}%")
+        print("=" * 60)
+
+        sectors = [args.sector.upper()] if args.sector else SECTOR_SCOPED_FIXES.keys()
+
+        for sector in sectors:
+            if sector not in SECTOR_SCOPED_FIXES:
+                print(f"\n⚠️  {sector}: No sector-scoped fixes defined")
+                continue
+
+            print(f"\n📁 {sector}")
+            print(f"   Evidence: {SECTOR_SCOPED_FIXES[sector]['evidence']}")
+
+            changes = apply_fixes(sector, SECTOR_SCOPED_FIXES[sector], dry_run=args.dry_run)
+            for change in changes:
+                print(f"   {change}")
+
+        print("\n" + "=" * 60)
+        if args.dry_run:
+            print("DRY RUN - No changes applied")
+        else:
+            print("Fixes applied - see CHANGELOG.json for history")
+        print("=" * 60)
+        return
+
+    parser.print_help()
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/reviewiq-pipeline/scripts/generate_sector_briefs.py
+++ b/packages/reviewiq-pipeline/scripts/generate_sector_briefs.py
@@ -0,0 +1,372 @@
+#!/usr/bin/env python3
+"""
+Wave 0: Sector Brief Generator
+
+Generates alignment context briefs for each sector.
+These briefs inform Wave 1 and Wave 2 primitive config generation.
+
+Usage:
+    python generate_sector_briefs.py                       # Generate all sectors
+    python generate_sector_briefs.py --sector FOOD_DINING  # Generate one sector
+    python generate_sector_briefs.py --dry-run             # Show what would be generated
+    python generate_sector_briefs.py --validate            # Validate existing briefs
+"""
+
+import argparse
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+try:
+    from openai import OpenAI
+except ImportError:
+    print("ERROR: openai package required. Install with: pip install openai")
+    sys.exit(1)
+
+
+PROMPT_TEMPLATE = '''You are an expert in customer experience analysis across industries.
+
+Your task: Generate a **sector brief** for the "{sector_name}" sector.
+
+This brief will be used to align classification agents with industry-specific context.
+It describes what customers care about — NOT how to classify, NOT what primitives to use.
+
+## Sector Information
+
+- **Code**: {sector_code}
+- **Name**: {sector_name}
+- **Description**: {description}
+- **Sample Business Types**: {business_types}
+
+## Output Requirements
+
+Generate a JSON object with this exact structure:
+
+```json
+{{
+  "sector_code": "{sector_code}",
+  "sector_name": "{sector_name}",
+  "generated_at": "<ISO timestamp>",
+  "version": "1.0",
+
+  "what_customers_judge": {{
+    "description": "The primary dimensions customers evaluate in this sector",
+    "items": [
+      {{
+        "aspect": "string (2-5 words)",
+        "importance": "critical | high | moderate",
+        "why_it_matters": "string (1 sentence)"
+      }}
+    ]
+  }},
+
+  "critical_pain_points": {{
+    "description": "What damages reputation most severely",
+    "items": [
+      {{
+        "pain_point": "string (2-5 words)",
+        "typical_language": ["phrases customers actually use in reviews"],
+        "reputation_impact": "severe | significant | moderate"
+      }}
+    ]
+  }},
+
+  "common_praise": {{
+    "description": "What earns customer loyalty and positive reviews",
+    "items": [
+      {{
+        "praise_area": "string (2-5 words)",
+        "typical_language": ["phrases customers actually use in reviews"],
+        "loyalty_impact": "high | moderate"
+      }}
+    ]
+  }},
+
+  "industry_terminology": {{
+    "description": "Domain-specific vocabulary",
+    "staff_terms": ["terms for staff roles in this sector"],
+    "product_terms": ["terms for products/services"],
+    "process_terms": ["terms for processes/interactions"],
+    "quality_terms": ["positive quality descriptors"],
+    "problem_terms": ["negative quality descriptors"]
+  }},
+
+  "mode_specific_concerns": {{
+    "description": "Different service modes have different priorities",
+    "modes": [
+      {{
+        "mode": "string (e.g., 'In-person', 'Online', 'Phone')",
+        "primary_concerns": ["top concerns for this mode"],
+        "unique_pain_points": ["pain points specific to this mode"]
+      }}
+    ]
+  }},
+
+  "what_is_actionable": {{
+    "description": "Feedback businesses can act on",
+    "actionable_examples": [
+      {{
+        "feedback_type": "string",
+        "example": "string (realistic review excerpt)",
+        "action_owner": "role/team that can fix it"
+      }}
+    ],
+    "not_actionable_examples": [
+      {{
+        "feedback_type": "string",
+        "example": "string (realistic review excerpt)",
+        "why_not_actionable": "string"
+      }}
+    ]
+  }},
+
+  "sector_specific_signals": {{
+    "description": "Signals with sector-specific meaning",
+    "examples": [
+      {{
+        "signal": "string (word or phrase)",
+        "meaning_in_this_sector": "string",
+        "contrast_with": "how it differs in other sectors"
+      }}
+    ]
+  }}
+}}
+```
+
+## Critical Rules
+
+1. **Use realistic review language** in `typical_language` arrays - actual phrases customers write
+2. **Include 4-8 items** per array (not too few, not excessive)
+3. **Be sector-specific** - don't use generic phrases that apply to all businesses
+4. **Include appropriate modes** - only modes that actually exist in this sector
+5. **NO primitive codes, priorities, weights, or solutions**
+6. **Focus on WHAT customers care about**, not HOW to classify it
+
+Return ONLY the JSON object, no markdown formatting or explanation.'''
+
+
+def load_sectors(data_path: Path) -> list[dict]:
+    """Load sector definitions from JSON file."""
+    with open(data_path) as f:
+        data = json.load(f)
+    return data["sectors"]
+
+
+def generate_sector_brief(client: OpenAI, sector: dict, model: str) -> dict:
+    """Generate a sector brief using LLM."""
+    prompt = PROMPT_TEMPLATE.format(
+        sector_code=sector["sector_code"],
+        sector_name=sector["sector_name"],
+        description=sector["description"],
+        business_types=", ".join(sector["sample_business_types"])
+    )
+
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "system",
+                "content": "You are an expert customer experience analyst. Return only valid JSON, no markdown."
+            },
+            {"role": "user", "content": prompt}
+        ],
+        temperature=0.3,
+        max_tokens=4000,
+        response_format={"type": "json_object"}
+    )
+
+    text = response.choices[0].message.content.strip()
+
+    # Parse JSON
+    brief = json.loads(text)
+
+    # Ensure required fields
+    brief["sector_code"] = sector["sector_code"]
+    brief["sector_name"] = sector["sector_name"]
+    brief["generated_at"] = datetime.utcnow().isoformat() + "Z"
+    brief["version"] = "1.0"
+
+    return brief
+
+
+def validate_brief(brief: dict) -> list[str]:
+    """Validate a sector brief, return list of issues."""
+    issues = []
+
+    required_keys = [
+        "what_customers_judge",
+        "critical_pain_points",
+        "common_praise",
+        "industry_terminology",
+        "mode_specific_concerns",
+        "what_is_actionable",
+        "sector_specific_signals"
+    ]
+
+    for key in required_keys:
+        if key not in brief:
+            issues.append(f"Missing required key: {key}")
+
+    # Check array lengths
+    if "what_customers_judge" in brief:
+        items = brief["what_customers_judge"].get("items", [])
+        if len(items) < 3:
+            issues.append(f"what_customers_judge has only {len(items)} items (need 3+)")
+        if len(items) > 10:
+            issues.append(f"what_customers_judge has {len(items)} items (max 10)")
+
+    if "critical_pain_points" in brief:
+        items = brief["critical_pain_points"].get("items", [])
+        if len(items) < 3:
+            issues.append(f"critical_pain_points has only {len(items)} items (need 3+)")
+
+    if "common_praise" in brief:
+        items = brief["common_praise"].get("items", [])
+        if len(items) < 3:
+            issues.append(f"common_praise has only {len(items)} items (need 3+)")
+
+    # Check for forbidden content
+    text = json.dumps(brief).lower()
+    forbidden = ["priority", "weight", "primitive", "enabled", "disabled", "solution"]
+    for word in forbidden:
+        if word in text and word != "solution":  # solution can appear in context
+            issues.append(f"Contains potentially forbidden term: {word}")
+
+    return issues
+
+
+def save_brief(brief: dict, output_dir: Path) -> Path:
+    """Save brief to JSON file."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+    filename = f"{brief['sector_code'].lower()}_brief.json"
+    output_path = output_dir / filename
+
+    with open(output_path, "w") as f:
+        json.dump(brief, f, indent=2)
+
+    return output_path
+
+
+def validate_existing_briefs(output_dir: Path) -> None:
+    """Validate all existing brief files."""
+    if not output_dir.exists():
+        print(f"Output directory does not exist: {output_dir}")
+        return
+
+    files = list(output_dir.glob("*_brief.json"))
+    if not files:
+        print("No brief files found")
+        return
+
+    print(f"Validating {len(files)} brief files...\n")
+
+    all_valid = True
+    for filepath in sorted(files):
+        with open(filepath) as f:
+            brief = json.load(f)
+
+        issues = validate_brief(brief)
+        status = "✓" if not issues else "✗"
+        print(f"{status} {filepath.name}")
+
+        if issues:
+            all_valid = False
+            for issue in issues:
+                print(f"    - {issue}")
+
+    print()
+    if all_valid:
+        print("All briefs valid!")
+    else:
+        print("Some briefs have issues.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate sector briefs for Wave 0")
+    parser.add_argument("--sector", help="Generate only this sector code")
+    parser.add_argument("--dry-run", action="store_true", help="Show what would be generated")
+    parser.add_argument("--validate", action="store_true", help="Validate existing briefs")
+    parser.add_argument("--output-dir", default="data/sector_briefs", help="Output directory")
+    parser.add_argument("--model", default="gpt-4o", help="OpenAI model to use")
+    args = parser.parse_args()
+
+    # Paths
+    script_dir = Path(__file__).parent
+    package_dir = script_dir.parent
+    data_path = package_dir / "data" / "sectors.json"
+    output_dir = package_dir / args.output_dir
+
+    # Validate mode
+    if args.validate:
+        validate_existing_briefs(output_dir)
+        return
+
+    # Load sectors
+    sectors = load_sectors(data_path)
+    print(f"Loaded {len(sectors)} sectors")
+
+    # Filter to single sector if specified
+    if args.sector:
+        sectors = [s for s in sectors if s["sector_code"] == args.sector]
+        if not sectors:
+            print(f"ERROR: Sector '{args.sector}' not found")
+            sys.exit(1)
+
+    if args.dry_run:
+        print("\n[DRY RUN] Would generate briefs for:")
+        for sector in sectors:
+            print(f"  - {sector['sector_code']}: {sector['sector_name']}")
+        print(f"\nOutput directory: {output_dir}")
+        return
+
+    # Check API key
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        print("ERROR: OPENAI_API_KEY environment variable required")
+        sys.exit(1)
+
+    # Initialize client
+    client = OpenAI(api_key=api_key)
+    print(f"Using model: {args.model}")
+
+    # Generate briefs
+    results = {"success": [], "failed": []}
+
+    for i, sector in enumerate(sectors, 1):
+        print(f"\n[{i}/{len(sectors)}] Generating brief for: {sector['sector_name']}")
+
+        try:
+            brief = generate_sector_brief(client, sector, args.model)
+
+            # Validate
+            issues = validate_brief(brief)
+            if issues:
+                print(f"  Warnings:")
+                for issue in issues:
+                    print(f"    - {issue}")
+
+            # Save
+            output_path = save_brief(brief, output_dir)
+            print(f"  ✓ Saved to: {output_path}")
+            results["success"].append(sector["sector_code"])
+
+        except Exception as e:
+            print(f"  ✗ FAILED: {e}")
+            results["failed"].append(sector["sector_code"])
+
+    # Summary
+    print(f"\n{'='*60}")
+    print(f"SUMMARY")
+    print(f"{'='*60}")
+    print(f"Success: {len(results['success'])}")
+    print(f"Failed:  {len(results['failed'])}")
+
+    if results["failed"]:
+        print(f"\nFailed sectors: {', '.join(results['failed'])}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/reviewiq-pipeline/scripts/llm_classifier.py
+++ b/packages/reviewiq-pipeline/scripts/llm_classifier.py
@@ -0,0 +1,523 @@
+"""
+LLM Classifier - Real classification using OpenAI Responses API.
+
+Uses JSON Schema to enforce strict output format.
+Validates primitives against enabled set.
+Stores raw response for audit.
+Supports multilingual reviews with language detection.
+"""
+
+import hashlib
+import json
+import os
+import re
+import time
+from typing import Any
+
+from openai import OpenAI
+
+# Language detection - try langdetect, fall back to heuristics
+try:
+    from langdetect import detect as langdetect_detect, LangDetectException
+    LANGDETECT_AVAILABLE = True
+except ImportError:
+    LANGDETECT_AVAILABLE = False
+    LangDetectException = Exception  # Placeholder
+
+
+def detect_language(text: str) -> tuple[str, float]:
+    """
+    Detect the language of a text.
+
+    Returns (language_code, confidence).
+    Supported languages: en, es, de, fr, it, pt, ru, zh, ja, ko, ar, etc.
+
+    Falls back to heuristic detection if langdetect unavailable.
+    """
+    if not text or len(text.strip()) < 3:
+        return "unknown", 0.0
+
+    text = text.strip()
+
+    # Try langdetect first (most accurate)
+    if LANGDETECT_AVAILABLE:
+        try:
+            lang = langdetect_detect(text)
+            # langdetect doesn't provide confidence directly, estimate based on text length
+            confidence = min(0.95, 0.5 + len(text) / 200)
+            return lang, confidence
+        except LangDetectException:
+            pass
+
+    # Fallback: Simple heuristic detection based on character ranges
+    # This is less accurate but works without dependencies
+
+    # Count characters in different scripts
+    latin = sum(1 for c in text if '\u0041' <= c <= '\u024F')  # Latin extended
+    cyrillic = sum(1 for c in text if '\u0400' <= c <= '\u04FF')  # Cyrillic
+    cjk = sum(1 for c in text if '\u4E00' <= c <= '\u9FFF')  # CJK Unified
+    japanese = sum(1 for c in text if '\u3040' <= c <= '\u30FF')  # Hiragana + Katakana
+    korean = sum(1 for c in text if '\uAC00' <= c <= '\uD7AF')  # Hangul
+    arabic = sum(1 for c in text if '\u0600' <= c <= '\u06FF')  # Arabic
+
+    total = len(text)
+    if total == 0:
+        return "unknown", 0.0
+
+    # Determine primary script
+    if cjk / total > 0.3:
+        return "zh", 0.6  # Chinese
+    if japanese / total > 0.2:
+        return "ja", 0.6  # Japanese
+    if korean / total > 0.3:
+        return "ko", 0.6  # Korean
+    if cyrillic / total > 0.3:
+        return "ru", 0.5  # Russian (could be other Cyrillic)
+    if arabic / total > 0.3:
+        return "ar", 0.5  # Arabic
+
+    if latin / total > 0.5:
+        # Latin script - try to distinguish languages by common words
+        text_lower = text.lower()
+
+        # Spanish indicators (expanded for better detection)
+        es_words = ['el', 'la', 'los', 'las', 'de', 'que', 'es', 'en', 'un', 'una',
+                    'muy', 'pero', 'con', 'está', 'están', 'para', 'por', 'como',
+                    'excelente', 'recomendado', 'servicio', 'bueno', 'malo', 'bien',
+                    'todo', 'nada', 'más', 'sin', 'nunca', 'siempre', 'también']
+        es_score = sum(1 for w in es_words if re.search(rf'\b{w}\b', text_lower))
+
+        # Spanish-specific patterns (accents, ñ, inverted punctuation)
+        if 'ñ' in text_lower or '¿' in text or '¡' in text:
+            es_score += 3
+        if any(c in text_lower for c in 'áéíóúü'):
+            es_score += 1
+
+        # English indicators
+        en_words = ['the', 'and', 'is', 'are', 'was', 'were', 'this', 'that',
+                    'with', 'for', 'but', 'not', 'very', 'great', 'good',
+                    'service', 'place', 'food', 'staff', 'friendly', 'amazing',
+                    'would', 'recommend', 'will', 'definitely', 'really']
+        en_score = sum(1 for w in en_words if re.search(rf'\b{w}\b', text_lower))
+
+        # German indicators
+        de_words = ['der', 'die', 'das', 'und', 'ist', 'sind', 'war', 'sehr',
+                    'mit', 'für', 'aber', 'nicht', 'ein', 'eine', 'wir', 'ich',
+                    'auch', 'gut', 'schlecht', 'toll', 'super']
+        de_score = sum(1 for w in de_words if re.search(rf'\b{w}\b', text_lower))
+        # German umlauts
+        if any(c in text_lower for c in 'äöüß'):
+            de_score += 2
+
+        # French indicators
+        fr_words = ['le', 'la', 'les', 'est', 'sont', 'très', 'mais', 'avec',
+                    'pour', 'pas', 'un', 'une', 'et', 'nous', 'vous', 'bien',
+                    'bon', 'mauvais', 'excellent', 'super', "c'est", "j'ai"]
+        fr_score = sum(1 for w in fr_words if re.search(rf'\b{w}\b', text_lower))
+        # French accents and patterns
+        if any(c in text_lower for c in 'àâçèêëîïôùûÿœæ'):
+            fr_score += 2
+
+        scores = {'es': es_score, 'en': en_score, 'de': de_score, 'fr': fr_score}
+        best_lang = max(scores, key=scores.get)
+        best_score = scores[best_lang]
+
+        if best_score >= 1:  # Lowered threshold
+            confidence = min(0.75, 0.3 + best_score * 0.08)
+            return best_lang, confidence
+
+        # Default to English for Latin script
+        return "en", 0.3
+
+    return "unknown", 0.1
+
+# Lazy client initialization
+_client = None
+
+
+def get_client() -> OpenAI:
+    """Get OpenAI client, initializing lazily on first use."""
+    global _client
+    if _client is None:
+        api_key = os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            raise RuntimeError(
+                "OPENAI_API_KEY environment variable not set. "
+                "Set it or use --dry-run / mock classifier."
+            )
+        _client = OpenAI(api_key=api_key)
+    return _client
+
+# Default model
+DEFAULT_MODEL = os.environ.get("OPENAI_MODEL", "gpt-4o-mini")
+
+# Meta primitives - always available
+META_PRIMITIVES = frozenset([
+    "HONESTY", "ETHICS", "PROMISES",
+    "ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",
+    "RETURN_INTENT", "RECOMMEND", "RECOGNITION",
+    "UNMAPPED",
+])
+
+# JSON Schema for structured output
+SPAN_SCHEMA = {
+    "name": "review_classification",
+    "strict": True,
+    "schema": {
+        "type": "object",
+        "additionalProperties": False,
+        "properties": {
+            "spans": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "additionalProperties": False,
+                    "properties": {
+                        "primitive": {"type": "string"},
+                        "valence": {"type": "string", "enum": ["positive", "negative", "mixed", "neutral"]},
+                        "intensity": {"type": "integer", "minimum": 1, "maximum": 5},
+                        "evidence": {"type": "string"},
+                        "start_char": {"type": ["integer", "null"]},
+                        "end_char": {"type": ["integer", "null"]},
+                        "confidence": {"type": "number", "minimum": 0.0, "maximum": 1.0},
+                        "details": {"type": "null"}
+                    },
+                    "required": ["primitive", "valence", "intensity", "evidence", "confidence", "start_char", "end_char", "details"]
+                }
+            },
+            "unmapped": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "additionalProperties": False,
+                    "properties": {
+                        "label": {"type": "string"},
+                        "evidence": {"type": "string"},
+                        "confidence": {"type": "number", "minimum": 0.0, "maximum": 1.0}
+                    },
+                    "required": ["label", "evidence", "confidence"]
+                }
+            }
+        },
+        "required": ["spans", "unmapped"]
+    }
+}
+
+# System prompt
+SYSTEM_PROMPT = """You are a review classification system that extracts semantic spans and maps them to primitives.
+
+## RULES (MUST FOLLOW)
+
+1. Use ONLY primitives from the enabled_primitives list provided. Do NOT invent new primitives.
+
+2. Meta primitives are always available: HONESTY, ETHICS, PROMISES, ACKNOWLEDGMENT, RESPONSE_QUALITY, RECOVERY, RETURN_INTENT, RECOMMEND, RECOGNITION, UNMAPPED
+
+3. If content doesn't fit any enabled primitive, use UNMAPPED or put it in the unmapped array with a descriptive label.
+
+4. Output MUST match the JSON schema exactly. No extra keys.
+
+5. Evidence must be a SHORT EXACT QUOTE from the review text (in original language).
+
+6. Extract 1-5 spans per review. Prefer fewer, larger spans over many small ones.
+
+7. If unsure about classification, lower the confidence score.
+
+## VALENCE
+- positive: praise, satisfaction, recommendation
+- negative: complaint, dissatisfaction, warning
+- mixed: both positive and negative in same span
+- neutral: factual observation, no sentiment
+
+## INTENSITY (1-5)
+- 1: mild ("okay", "fine")
+- 2: moderate ("good", "bad")
+- 3: strong ("great", "terrible")
+- 4: very strong ("amazing", "awful")
+- 5: extreme ("best ever", "worst nightmare")
+
+## CONFIDENCE
+- 0.9+: Very certain the primitive fits
+- 0.7-0.9: Confident
+- 0.5-0.7: Moderate confidence
+- <0.5: Low confidence (consider UNMAPPED)
+
+Output valid JSON only. No markdown, no explanations."""
+
+
+def compute_review_hash(text: str, config_version: str) -> str:
+    """Compute hash for caching."""
+    key = f"{config_version}:{text}"
+    return hashlib.sha256(key.encode()).hexdigest()[:16]
+
+
+def build_user_payload(
+    review_text: str,
+    rating: int | None,
+    config: dict[str, Any],
+    language: str = "auto",
+) -> dict[str, Any]:
+    """Build the user message payload for the LLM."""
+    # Extract only what the model needs
+    enabled = set(config.get("enabled_primitives", []))
+    enabled.update(META_PRIMITIVES)
+
+    # Build primitive definitions (minimal)
+    primitives_dict = config.get("primitives", {})
+    primitive_defs = {}
+    for prim in enabled:
+        if prim in primitives_dict:
+            info = primitives_dict[prim]
+            primitive_defs[prim] = info.get("def", info.get("name", prim))
+        elif prim in META_PRIMITIVES:
+            primitive_defs[prim] = f"Meta primitive: {prim.replace('_', ' ').lower()}"
+
+    # Extract brief signals (keep it short)
+    brief = config.get("brief", {})
+    brief_summary = {}
+    if brief.get("what_customers_judge"):
+        items = brief["what_customers_judge"]
+        if isinstance(items, dict):
+            items = items.get("items", [])
+        brief_summary["key_judgment_areas"] = [
+            item.get("aspect", item.get("area", str(item))) if isinstance(item, dict) else str(item)
+            for item in items[:5]
+        ]
+    if brief.get("critical_pain_points"):
+        pains = brief["critical_pain_points"]
+        if isinstance(pains, dict):
+            pains = pains.get("items", [])
+        brief_summary["critical_pains"] = [
+            item.get("pain", str(item)) if isinstance(item, dict) else str(item)
+            for item in pains[:3]
+        ]
+
+    return {
+        "business": {
+            "name": config.get("business_id"),
+            "sector": config.get("sector_code"),
+            "config_version": config.get("config_version"),
+        },
+        "enabled_primitives": sorted(enabled),
+        "primitive_definitions": primitive_defs,
+        "weights": config.get("weights", {}),
+        "sector_brief": brief_summary,
+        "review": {
+            "text": review_text,
+            "rating": rating,
+            "language": language,
+        },
+    }
+
+
+def validate_response(
+    response: dict[str, Any],
+    enabled_primitives: set[str],
+) -> tuple[dict[str, Any], list[str]]:
+    """
+    Validate LLM response and fix invalid primitives.
+
+    Returns (validated_response, warnings).
+    """
+    warnings = []
+    all_valid = enabled_primitives | META_PRIMITIVES
+
+    validated_spans = []
+    for span in response.get("spans", []):
+        prim = span.get("primitive")
+        if prim not in all_valid:
+            warnings.append(f"Invalid primitive '{prim}' → UNMAPPED (original: {prim})")
+            span["primitive"] = "UNMAPPED"
+        validated_spans.append(span)
+
+    return {
+        "spans": validated_spans,
+        "unmapped": response.get("unmapped", []),
+    }, warnings
+
+
+def classify_review(
+    review_text: str,
+    rating: int | None,
+    config: dict[str, Any],
+    language: str = "auto",
+    model: str | None = None,
+    max_retries: int = 3,
+) -> dict[str, Any]:
+    """
+    Classify a single review using OpenAI.
+
+    Args:
+        review_text: The review text to classify
+        rating: Star rating (1-5) if available
+        config: Resolved config from ConfigResolver
+        language: Language hint (default: auto-detect)
+        model: Model to use (default: gpt-4o-mini)
+        max_retries: Max retries on transient errors
+
+    Returns:
+        {
+            "spans": [...],
+            "unmapped": [...],
+            "model": str,
+            "raw_response": str,
+            "review_hash": str,
+            "warnings": [...],
+            "detected_language": str,
+            "language_confidence": float,
+        }
+    """
+    model = model or DEFAULT_MODEL
+
+    # Detect language if auto
+    detected_lang = "unknown"
+    lang_confidence = 0.0
+    if language == "auto":
+        detected_lang, lang_confidence = detect_language(review_text)
+        language = detected_lang
+    else:
+        detected_lang = language
+        lang_confidence = 1.0  # User-specified
+
+    # Build payload with detected language
+    payload = build_user_payload(review_text, rating, config, detected_lang)
+    user_content = json.dumps(payload, ensure_ascii=False, indent=None)
+
+    # Compute hash for caching
+    review_hash = compute_review_hash(review_text, config.get("config_version", "1.0"))
+
+    # Call OpenAI with retries
+    last_error = None
+    client = get_client()
+    for attempt in range(max_retries):
+        try:
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": SYSTEM_PROMPT},
+                    {"role": "user", "content": user_content},
+                ],
+                response_format={
+                    "type": "json_schema",
+                    "json_schema": SPAN_SCHEMA,
+                },
+                temperature=0.1,  # Low temperature for consistency
+                max_tokens=2000,
+            )
+
+            # Parse response
+            raw_text = response.choices[0].message.content
+            parsed = json.loads(raw_text)
+
+            # Validate primitives
+            enabled = set(config.get("enabled_primitives", []))
+            validated, warnings = validate_response(parsed, enabled)
+
+            return {
+                "spans": validated["spans"],
+                "unmapped": validated["unmapped"],
+                "model": model,
+                "raw_response": raw_text,
+                "review_hash": review_hash,
+                "warnings": warnings,
+                "tokens": {
+                    "prompt": response.usage.prompt_tokens if response.usage else 0,
+                    "completion": response.usage.completion_tokens if response.usage else 0,
+                },
+                "detected_language": detected_lang,
+                "language_confidence": lang_confidence,
+            }
+
+        except json.JSONDecodeError as e:
+            last_error = f"JSON parse error: {e}"
+            # Don't retry parse errors - log and return fallback
+            break
+
+        except Exception as e:
+            last_error = str(e)
+            if "rate_limit" in str(e).lower() or "429" in str(e):
+                # Exponential backoff for rate limits
+                wait = 2 ** attempt
+                time.sleep(wait)
+                continue
+            elif "500" in str(e) or "502" in str(e) or "503" in str(e):
+                # Retry on server errors
+                time.sleep(1)
+                continue
+            else:
+                # Don't retry other errors
+                break
+
+    # Fallback response on error
+    return {
+        "spans": [{
+            "primitive": "UNMAPPED",
+            "valence": "neutral",
+            "intensity": 1,
+            "evidence": review_text[:100] if review_text else "",
+            "start_char": 0,
+            "end_char": min(100, len(review_text)) if review_text else 0,
+            "confidence": 0.1,
+            "details": {"error": last_error},
+        }],
+        "unmapped": [],
+        "model": model,
+        "raw_response": json.dumps({"error": last_error}),
+        "review_hash": review_hash,
+        "warnings": [f"Classification failed: {last_error}"],
+        "tokens": {"prompt": 0, "completion": 0},
+        "detected_language": detected_lang,
+        "language_confidence": lang_confidence,
+    }
+
+
+async def classify_review_async(
+    review_text: str,
+    rating: int | None,
+    config: dict[str, Any],
+    language: str = "auto",
+    model: str | None = None,
+) -> dict[str, Any]:
+    """Async wrapper for classify_review."""
+    import asyncio
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(
+        None,
+        lambda: classify_review(review_text, rating, config, language, model),
+    )
+
+
+# Batch classification (for later optimization)
+async def classify_batch(
+    reviews: list[dict[str, Any]],
+    config: dict[str, Any],
+    model: str | None = None,
+    max_concurrent: int = 5,
+) -> list[dict[str, Any]]:
+    """
+    Classify multiple reviews concurrently.
+
+    Args:
+        reviews: List of {"text": str, "rating": int, "language": str}
+        config: Resolved config
+        model: Model to use
+        max_concurrent: Max concurrent requests
+
+    Returns:
+        List of classification results
+    """
+    import asyncio
+
+    semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def classify_one(review: dict) -> dict:
+        async with semaphore:
+            return await classify_review_async(
+                review.get("text", ""),
+                review.get("rating"),
+                config,
+                review.get("language", "auto"),
+                model,
+            )
+
+    tasks = [classify_one(r) for r in reviews]
+    return await asyncio.gather(*tasks)
--- a/packages/reviewiq-pipeline/scripts/run_classification_v2.py
+++ b/packages/reviewiq-pipeline/scripts/run_classification_v2.py
--- a/packages/reviewiq-pipeline/scripts/validate_l1_configs.py
+++ b/packages/reviewiq-pipeline/scripts/validate_l1_configs.py
@@ -0,0 +1,457 @@
+#!/usr/bin/env python3
+"""
+Wave 1 L1 Config Validation Script
+
+Validates L1 primitive configs against real review data by analyzing:
+1. Coverage: % of spans mapped to enabled primitives
+2. Top primitives by frequency
+3. Disabled primitives appearing (potential misconfig)
+4. Weight effectiveness
+
+Usage:
+    python validate_l1_configs.py --sector ENTERTAINMENT --job-url "gokarts"
+    python validate_l1_configs.py --sector AUTOMOTIVE --job-url "clickrent"
+    python validate_l1_configs.py --all
+"""
+
+import argparse
+import asyncio
+import json
+import os
+import sys
+from collections import Counter, defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import asyncpg
+
+# Paths
+DATA_DIR = Path(__file__).parent.parent / "data"
+CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
+BRIEFS_DIR = DATA_DIR / "sector_briefs"
+
+# Primitive to URT domain mapping
+# Primitives map to URT domains: O=Offering, P=People, J=Journey, E=Environment, A=Access, V=Value, R=Relationship
+PRIMITIVE_TO_DOMAIN = {
+    # Quality -> Offering (O)
+    "TASTE": "O", "CRAFT": "O", "FRESHNESS": "O", "TEMPERATURE": "O",
+    "EFFECTIVENESS": "O", "ACCURACY": "O", "CONDITION": "O", "CONSISTENCY": "O",
+    # Service -> People (P)
+    "MANNER": "P", "COMPETENCE": "P", "ATTENTIVENESS": "P", "COMMUNICATION": "P",
+    # Process -> Journey (J)
+    "SPEED": "J", "FRICTION": "J", "RELIABILITY": "J", "AVAILABILITY": "J",
+    # Environment -> Environment (E)
+    "CLEANLINESS": "E", "COMFORT": "E", "SAFETY": "E", "AMBIANCE": "E",
+    "ACCESSIBILITY": "E", "DIGITAL_UX": "E",
+    # Value -> Value (V)
+    "PRICE_LEVEL": "V", "PRICE_FAIRNESS": "V", "PRICE_TRANSPARENCY": "V", "VALUE_FOR_MONEY": "V",
+}
+
+# URT code to primitive mapping (simplified - maps URT codes to closest primitive)
+URT_TO_PRIMITIVE = {
+    # Offering codes
+    "O1.01": "CONSISTENCY", "O1.02": "CRAFT", "O1.03": "FRESHNESS",
+    "O1.04": "EFFECTIVENESS", "O1.05": "TASTE", "O1.06": "CONDITION",
+    "O2.01": "ACCURACY", "O2.02": "EFFECTIVENESS", "O2.03": "CRAFT",
+    "O3.01": "ACCURACY", "O3.02": "CONSISTENCY", "O3.03": "EFFECTIVENESS",
+    # People codes
+    "P1.01": "MANNER", "P1.02": "MANNER", "P1.03": "ATTENTIVENESS",
+    "P1.04": "COMMUNICATION", "P1.05": "ATTENTIVENESS",
+    "P2.01": "COMPETENCE", "P2.02": "COMPETENCE", "P2.03": "COMPETENCE",
+    "P3.01": "COMMUNICATION", "P3.02": "COMMUNICATION", "P3.03": "COMMUNICATION",
+    # Journey codes
+    "J1.01": "SPEED", "J1.02": "RELIABILITY", "J1.03": "FRICTION",
+    "J1.04": "SPEED", "J1.05": "RELIABILITY",
+    "J2.01": "RELIABILITY", "J2.02": "RELIABILITY", "J2.03": "FRICTION",
+    "J3.01": "FRICTION", "J3.02": "FRICTION", "J3.03": "FRICTION",
+    # Environment codes
+    "E1.01": "CLEANLINESS", "E1.02": "COMFORT", "E1.03": "AMBIANCE",
+    "E1.04": "AMBIANCE", "E1.05": "COMFORT",
+    "E2.01": "AMBIANCE", "E2.02": "COMFORT", "E2.03": "COMFORT",
+    "E2.04": "AMBIANCE", "E2.05": "DIGITAL_UX",
+    "E3.01": "SAFETY", "E3.02": "SAFETY", "E3.03": "ACCESSIBILITY",
+    "E4.01": "ACCESSIBILITY", "E4.02": "ACCESSIBILITY", "E4.03": "DIGITAL_UX",
+    # Access codes
+    "A1.01": "AVAILABILITY", "A1.02": "AVAILABILITY", "A1.03": "AVAILABILITY",
+    "A1.04": "ACCESSIBILITY", "A1.05": "ACCESSIBILITY",
+    "A2.01": "ACCESSIBILITY", "A2.02": "ACCESSIBILITY", "A2.03": "DIGITAL_UX",
+    "A3.01": "ACCESSIBILITY", "A3.02": "ACCESSIBILITY", "A3.03": "SPEED",
+    "A4.01": "ACCESSIBILITY", "A4.02": "ACCESSIBILITY", "A4.03": "AVAILABILITY",
+    # Value codes
+    "V1.01": "PRICE_LEVEL", "V1.02": "PRICE_FAIRNESS", "V1.03": "PRICE_TRANSPARENCY",
+    "V2.01": "PRICE_FAIRNESS", "V2.02": "PRICE_TRANSPARENCY", "V2.03": "VALUE_FOR_MONEY",
+    "V3.01": "VALUE_FOR_MONEY", "V3.02": "VALUE_FOR_MONEY", "V3.03": "PRICE_FAIRNESS",
+    "V4.01": "VALUE_FOR_MONEY", "V4.02": "VALUE_FOR_MONEY", "V4.03": "VALUE_FOR_MONEY",
+    # Relationship codes
+    "R1.01": "RELIABILITY", "R1.02": "RELIABILITY", "R1.03": "RELIABILITY",
+    "R2.01": "RELIABILITY", "R2.02": "CONSISTENCY", "R2.03": "RELIABILITY",
+    "R3.01": "MANNER", "R3.02": "MANNER", "R3.03": "COMMUNICATION",
+    "R4.01": "CONSISTENCY", "R4.02": "RELIABILITY", "R4.03": "CONSISTENCY",
+}
+
+
+@dataclass
+class ValidationResult:
+    """Validation results for a sector."""
+    sector_code: str
+    job_count: int
+    review_count: int
+    span_count: int
+
+    # Coverage metrics
+    enabled_coverage: float  # % spans using enabled primitives
+    disabled_hits: dict[str, int]  # disabled primitives that appeared
+    unmapped_count: int  # spans that couldn't be mapped
+
+    # Distribution
+    primitive_counts: dict[str, int]  # all primitives by count
+    domain_distribution: dict[str, int]  # O, P, J, E, A, V, R
+    valence_distribution: dict[str, int]  # V+, V-, V0, V±
+
+    # Top codes
+    top_urt_codes: list[tuple[str, int]]
+
+    # Recommendations
+    recommendations: list[str]
+
+
+def load_l1_config(sector_code: str) -> dict[str, Any] | None:
+    """Load L1 config for a sector."""
+    config_file = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+    if not config_file.exists():
+        return None
+    with open(config_file) as f:
+        return json.load(f)
+
+
+def load_sector_brief(sector_code: str) -> dict[str, Any] | None:
+    """Load sector brief for a sector."""
+    brief_file = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
+    if not brief_file.exists():
+        return None
+    with open(brief_file) as f:
+        return json.load(f)
+
+
+def map_urt_to_primitive(urt_code: str) -> str | None:
+    """Map URT code to primitive."""
+    return URT_TO_PRIMITIVE.get(urt_code)
+
+
+async def fetch_spans_for_jobs(pool: asyncpg.Pool, job_url_pattern: str) -> list[dict]:
+    """Fetch spans for jobs matching URL pattern."""
+    query = """
+        SELECT
+            rs.urt_primary,
+            rs.valence,
+            rs.intensity,
+            rs.span_text,
+            j.url
+        FROM pipeline.review_spans rs
+        JOIN pipeline.reviews_raw rr ON rs.review_id = rr.review_id
+        JOIN public.jobs j ON rr.job_id = j.job_id
+        WHERE LOWER(j.url) LIKE $1
+        ORDER BY rs.created_at DESC
+    """
+    rows = await pool.fetch(query, f"%{job_url_pattern.lower()}%")
+    return [dict(row) for row in rows]
+
+
+async def fetch_all_spans(pool: asyncpg.Pool) -> list[dict]:
+    """Fetch all spans from database."""
+    query = """
+        SELECT
+            urt_primary,
+            valence,
+            intensity,
+            span_text
+        FROM pipeline.review_spans
+        ORDER BY created_at DESC
+    """
+    rows = await pool.fetch(query)
+    return [dict(row) for row in rows]
+
+
+def analyze_spans(
+    spans: list[dict],
+    config: dict[str, Any],
+) -> ValidationResult:
+    """Analyze spans against L1 config."""
+    sector_code = config["sector_code"]
+    enabled = set(config.get("enabled", []))
+    disabled = set(config.get("disabled", []))
+    weights = config.get("weights", {})
+
+    # Counters
+    primitive_counts: Counter = Counter()
+    domain_counts: Counter = Counter()
+    valence_counts: Counter = Counter()
+    urt_counts: Counter = Counter()
+    disabled_hits: Counter = Counter()
+    unmapped = 0
+    enabled_hits = 0
+
+    for span in spans:
+        urt_code = span["urt_primary"]
+        valence = span.get("valence", "V0")
+
+        # Count URT codes
+        urt_counts[urt_code] += 1
+
+        # Count valence
+        valence_counts[valence] += 1
+
+        # Map to primitive
+        primitive = map_urt_to_primitive(urt_code)
+        if primitive:
+            primitive_counts[primitive] += 1
+
+            # Count domain
+            domain = PRIMITIVE_TO_DOMAIN.get(primitive, urt_code[0])
+            domain_counts[domain] += 1
+
+            # Check if enabled or disabled
+            if primitive in enabled:
+                enabled_hits += 1
+            elif primitive in disabled:
+                disabled_hits[primitive] += 1
+        else:
+            unmapped += 1
+            # Still count domain from URT code
+            domain_counts[urt_code[0]] += 1
+
+    # Calculate coverage
+    total = len(spans)
+    enabled_coverage = enabled_hits / total if total > 0 else 0
+
+    # Generate recommendations
+    recommendations = []
+
+    # Check disabled primitives that appeared frequently
+    for prim, count in disabled_hits.most_common(5):
+        if count >= 10:
+            pct = count / total * 100
+            recommendations.append(
+                f"ENABLE {prim}: Disabled but appeared {count} times ({pct:.1f}%)"
+            )
+
+    # Check for missing high-weight primitives
+    weighted_set = set(weights.keys())
+    for prim in weighted_set:
+        if primitive_counts[prim] == 0 and prim in enabled:
+            recommendations.append(
+                f"CHECK {prim}: Weighted ({weights[prim]}x) but no appearances"
+            )
+
+    # Check for frequently appearing unweighted primitives
+    for prim, count in primitive_counts.most_common(10):
+        if prim in enabled and prim not in weights and count >= total * 0.1:
+            pct = count / total * 100
+            recommendations.append(
+                f"WEIGHT {prim}: High frequency ({count}, {pct:.1f}%) but not weighted"
+            )
+
+    return ValidationResult(
+        sector_code=sector_code,
+        job_count=1,  # Will be updated by caller
+        review_count=0,  # Not tracked at span level
+        span_count=total,
+        enabled_coverage=enabled_coverage,
+        disabled_hits=dict(disabled_hits),
+        unmapped_count=unmapped,
+        primitive_counts=dict(primitive_counts),
+        domain_distribution=dict(domain_counts),
+        valence_distribution=dict(valence_counts),
+        top_urt_codes=urt_counts.most_common(15),
+        recommendations=recommendations,
+    )
+
+
+def print_validation_report(result: ValidationResult, config: dict, brief: dict | None):
+    """Print formatted validation report."""
+    print("\n" + "=" * 70)
+    print(f"VALIDATION REPORT: {result.sector_code}")
+    print("=" * 70)
+
+    # Overview
+    print(f"\n📊 OVERVIEW")
+    print(f"   Spans analyzed: {result.span_count:,}")
+    print(f"   Enabled coverage: {result.enabled_coverage:.1%}")
+    print(f"   Unmapped spans: {result.unmapped_count} ({result.unmapped_count/result.span_count*100:.1f}%)" if result.span_count > 0 else "   No spans")
+
+    # Config summary
+    print(f"\n⚙️  CONFIG SUMMARY")
+    print(f"   Enabled: {len(config.get('enabled', []))} primitives")
+    print(f"   Disabled: {len(config.get('disabled', []))} primitives")
+    print(f"   Weighted: {len(config.get('weights', {}))} primitives")
+
+    # Domain distribution
+    print(f"\n📁 DOMAIN DISTRIBUTION")
+    domain_names = {"O": "Offering", "P": "People", "J": "Journey",
+                    "E": "Environment", "A": "Access", "V": "Value", "R": "Relationship"}
+    for domain in "OPJEVRA":
+        count = result.domain_distribution.get(domain, 0)
+        pct = count / result.span_count * 100 if result.span_count > 0 else 0
+        bar = "█" * int(pct / 2)
+        print(f"   {domain} {domain_names.get(domain, '?'):12} {count:4} ({pct:5.1f}%) {bar}")
+
+    # Valence distribution
+    print(f"\n😊 VALENCE DISTRIBUTION")
+    for val in ["V+", "V-", "V0", "V±"]:
+        count = result.valence_distribution.get(val, 0)
+        pct = count / result.span_count * 100 if result.span_count > 0 else 0
+        print(f"   {val}: {count:4} ({pct:5.1f}%)")
+
+    # Top primitives
+    print(f"\n🔝 TOP PRIMITIVES")
+    enabled_set = set(config.get("enabled", []))
+    weights = config.get("weights", {})
+    for prim, count in sorted(result.primitive_counts.items(), key=lambda x: -x[1])[:12]:
+        pct = count / result.span_count * 100 if result.span_count > 0 else 0
+        status = "✓" if prim in enabled_set else "✗"
+        weight = f"({weights[prim]}x)" if prim in weights else ""
+        print(f"   {status} {prim:20} {count:4} ({pct:5.1f}%) {weight}")
+
+    # Top URT codes
+    print(f"\n📋 TOP URT CODES")
+    for code, count in result.top_urt_codes[:10]:
+        pct = count / result.span_count * 100 if result.span_count > 0 else 0
+        mapped = URT_TO_PRIMITIVE.get(code, "UNMAPPED")
+        print(f"   {code}: {count:4} ({pct:5.1f}%) → {mapped}")
+
+    # Disabled but appearing
+    if result.disabled_hits:
+        print(f"\n⚠️  DISABLED BUT APPEARING")
+        for prim, count in sorted(result.disabled_hits.items(), key=lambda x: -x[1]):
+            pct = count / result.span_count * 100 if result.span_count > 0 else 0
+            print(f"   {prim}: {count} ({pct:.1f}%)")
+
+    # Recommendations
+    if result.recommendations:
+        print(f"\n💡 RECOMMENDATIONS")
+        for rec in result.recommendations:
+            print(f"   • {rec}")
+
+    # Brief signals check (if available)
+    if brief:
+        print(f"\n📝 BRIEF SIGNALS CHECK")
+        what_customers_judge = brief.get("what_customers_judge", {})
+        if isinstance(what_customers_judge, dict):
+            items = what_customers_judge.get("items", [])
+        else:
+            items = what_customers_judge if isinstance(what_customers_judge, list) else []
+
+        print(f"   Key judgment areas from brief:")
+        for item in items[:5]:
+            if isinstance(item, dict):
+                print(f"   • {item.get('area', item)}")
+            else:
+                print(f"   • {item}")
+
+    print("\n" + "=" * 70)
+
+
+async def run_validation(
+    sector_code: str,
+    job_url_pattern: str | None = None,
+    db_url: str | None = None,
+):
+    """Run validation for a sector."""
+    # Load config
+    config = load_l1_config(sector_code)
+    if not config:
+        print(f"❌ No L1 config found for {sector_code}")
+        return None
+
+    # Load brief
+    brief = load_sector_brief(sector_code)
+
+    # Connect to database
+    db_url = db_url or os.environ.get(
+        "DATABASE_URL",
+        "postgresql://scraper:scraper123@localhost:5437/scraper"
+    )
+
+    pool = await asyncpg.create_pool(db_url)
+
+    try:
+        # Fetch spans
+        if job_url_pattern:
+            spans = await fetch_spans_for_jobs(pool, job_url_pattern)
+            if not spans:
+                print(f"⚠️  No spans found for jobs matching '{job_url_pattern}'")
+                return None
+        else:
+            spans = await fetch_all_spans(pool)
+
+        # Analyze
+        result = analyze_spans(spans, config)
+
+        # Print report
+        print_validation_report(result, config, brief)
+
+        return result
+
+    finally:
+        await pool.close()
+
+
+async def run_all_validations(db_url: str | None = None):
+    """Run validation for all sectors with available data."""
+    # Known jobs and their sectors
+    jobs_by_sector = {
+        "ENTERTAINMENT": ["gokarts", "soho"],
+        "AUTOMOTIVE": ["clickrent"],
+        "PERSONAL_SERVICES": ["fleitas"],
+        "FOOD_DINING": ["fika"],
+    }
+
+    results = {}
+
+    for sector, job_patterns in jobs_by_sector.items():
+        print(f"\n{'='*70}")
+        print(f"Validating {sector}...")
+        print(f"{'='*70}")
+
+        for pattern in job_patterns:
+            result = await run_validation(sector, pattern, db_url)
+            if result:
+                results[f"{sector}:{pattern}"] = result
+
+    # Summary
+    print("\n" + "=" * 70)
+    print("VALIDATION SUMMARY")
+    print("=" * 70)
+
+    for key, result in results.items():
+        sector, pattern = key.split(":")
+        print(f"\n{sector} ({pattern}):")
+        print(f"  Coverage: {result.enabled_coverage:.1%}")
+        print(f"  Spans: {result.span_count}")
+        if result.disabled_hits:
+            print(f"  ⚠️ Disabled hits: {sum(result.disabled_hits.values())}")
+        if result.recommendations:
+            print(f"  Recommendations: {len(result.recommendations)}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Validate L1 primitive configs")
+    parser.add_argument("--sector", help="Sector code (e.g., ENTERTAINMENT)")
+    parser.add_argument("--job-url", help="Job URL pattern to filter (e.g., 'gokarts')")
+    parser.add_argument("--all", action="store_true", help="Run all validations")
+    parser.add_argument("--db-url", help="Database URL")
+
+    args = parser.parse_args()
+
+    if args.all:
+        asyncio.run(run_all_validations(args.db_url))
+    elif args.sector:
+        asyncio.run(run_validation(args.sector, args.job_url, args.db_url))
+    else:
+        parser.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/reviewiq-pipeline/scripts/validate_l1_configs_v2.py
+++ b/packages/reviewiq-pipeline/scripts/validate_l1_configs_v2.py
@@ -0,0 +1,421 @@
+#!/usr/bin/env python3
+"""
+Wave 1 L1 Config Validation Script - V2 (Sector-Scoped)
+
+Validates L1 primitive configs against SECTOR-SPECIFIC review data.
+Only validates sectors where we have real business data.
+
+Key improvement over v1: spans are filtered by business → sector mapping,
+ensuring "TASTE in HEALTHCARE" noise doesn't pollute results.
+
+Usage:
+    python validate_l1_configs_v2.py --sector ENTERTAINMENT
+    python validate_l1_configs_v2.py --sector AUTOMOTIVE
+    python validate_l1_configs_v2.py --all
+    python validate_l1_configs_v2.py --report  # Summary only
+"""
+
+import argparse
+import asyncio
+import json
+import os
+from collections import Counter
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import asyncpg
+
+# Paths
+DATA_DIR = Path(__file__).parent.parent / "data"
+CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
+BRIEFS_DIR = DATA_DIR / "sector_briefs"
+
+# Business → Sector mapping (ground truth)
+BUSINESS_TO_SECTOR = {
+    "Go Karts Mar Menor": "ENTERTAINMENT",
+    "ClickRent Gran Canaria | Alquiler de Coches y Furgonetas": "AUTOMOTIVE",
+    "Soho Club": "ENTERTAINMENT",
+    "Fika": "FOOD_DINING",
+}
+
+# Sectors with real data
+SECTORS_WITH_DATA = {"ENTERTAINMENT", "AUTOMOTIVE", "FOOD_DINING"}
+
+# URT code to primitive mapping
+URT_TO_PRIMITIVE = {
+    # Offering codes
+    "O1.01": "CONSISTENCY", "O1.02": "CRAFT", "O1.03": "FRESHNESS",
+    "O1.04": "EFFECTIVENESS", "O1.05": "TASTE", "O1.06": "CONDITION",
+    "O2.01": "ACCURACY", "O2.02": "EFFECTIVENESS", "O2.03": "CRAFT",
+    "O3.01": "ACCURACY", "O3.02": "CONSISTENCY", "O3.03": "EFFECTIVENESS",
+    # People codes
+    "P1.01": "MANNER", "P1.02": "MANNER", "P1.03": "ATTENTIVENESS",
+    "P1.04": "COMMUNICATION", "P1.05": "ATTENTIVENESS",
+    "P2.01": "COMPETENCE", "P2.02": "COMPETENCE", "P2.03": "COMPETENCE",
+    "P3.01": "COMMUNICATION", "P3.02": "COMMUNICATION", "P3.03": "COMMUNICATION",
+    # Journey codes
+    "J1.01": "SPEED", "J1.02": "RELIABILITY", "J1.03": "FRICTION",
+    "J1.04": "SPEED", "J1.05": "RELIABILITY",
+    "J2.01": "RELIABILITY", "J2.02": "RELIABILITY", "J2.03": "FRICTION",
+    "J3.01": "FRICTION", "J3.02": "FRICTION", "J3.03": "FRICTION",
+    # Environment codes
+    "E1.01": "CLEANLINESS", "E1.02": "COMFORT", "E1.03": "AMBIANCE",
+    "E1.04": "AMBIANCE", "E1.05": "COMFORT",
+    "E2.01": "AMBIANCE", "E2.02": "COMFORT", "E2.03": "COMFORT",
+    "E2.04": "AMBIANCE", "E2.05": "DIGITAL_UX",
+    "E3.01": "SAFETY", "E3.02": "SAFETY", "E3.03": "ACCESSIBILITY",
+    "E4.01": "ACCESSIBILITY", "E4.02": "ACCESSIBILITY", "E4.03": "DIGITAL_UX",
+    # Access codes
+    "A1.01": "AVAILABILITY", "A1.02": "AVAILABILITY", "A1.03": "AVAILABILITY",
+    "A1.04": "ACCESSIBILITY", "A1.05": "ACCESSIBILITY",
+    "A2.01": "ACCESSIBILITY", "A2.02": "ACCESSIBILITY", "A2.03": "DIGITAL_UX",
+    "A3.01": "ACCESSIBILITY", "A3.02": "ACCESSIBILITY", "A3.03": "SPEED",
+    "A4.01": "ACCESSIBILITY", "A4.02": "ACCESSIBILITY", "A4.03": "AVAILABILITY",
+    # Value codes
+    "V1.01": "PRICE_LEVEL", "V1.02": "PRICE_FAIRNESS", "V1.03": "PRICE_TRANSPARENCY",
+    "V2.01": "PRICE_FAIRNESS", "V2.02": "PRICE_TRANSPARENCY", "V2.03": "VALUE_FOR_MONEY",
+    "V3.01": "VALUE_FOR_MONEY", "V3.02": "VALUE_FOR_MONEY", "V3.03": "PRICE_FAIRNESS",
+    "V4.01": "VALUE_FOR_MONEY", "V4.02": "VALUE_FOR_MONEY", "V4.03": "VALUE_FOR_MONEY",
+    # Relationship codes (map to meta - these should stay unmapped)
+    "R1.01": None, "R1.02": None, "R1.03": None,
+    "R2.01": None, "R2.02": None, "R2.03": None,
+    "R3.01": None, "R3.02": None, "R3.03": None,
+    "R4.01": None, "R4.02": None, "R4.03": None,
+}
+
+# Minimum threshold for "enable" recommendations (% of sector spans)
+ENABLE_THRESHOLD_PCT = 3.0  # Only recommend enable if >= 3% of sector spans
+
+
+@dataclass
+class SectorValidation:
+    """Validation result for a single sector."""
+    sector_code: str
+    businesses: list[str]
+    span_count: int
+
+    # Coverage
+    enabled_coverage: float
+    disabled_hits: dict[str, int] = field(default_factory=dict)
+    unmapped_count: int = 0
+
+    # Distribution
+    primitive_counts: dict[str, int] = field(default_factory=dict)
+    domain_distribution: dict[str, int] = field(default_factory=dict)
+    valence_distribution: dict[str, int] = field(default_factory=dict)
+    top_urt_codes: list[tuple[str, int]] = field(default_factory=list)
+
+    # Recommendations (threshold-gated)
+    recommended_enables: list[tuple[str, float]] = field(default_factory=list)  # (primitive, pct)
+    recommended_disables: list[tuple[str, float]] = field(default_factory=list)
+    weight_issues: list[str] = field(default_factory=list)
+
+    # Metadata
+    validated_at: str = ""
+    config_version: str = ""
+
+
+def load_l1_config(sector_code: str) -> dict[str, Any] | None:
+    """Load L1 config for a sector."""
+    config_file = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+    if not config_file.exists():
+        return None
+    with open(config_file) as f:
+        return json.load(f)
+
+
+def get_businesses_for_sector(sector_code: str) -> list[str]:
+    """Get list of businesses belonging to a sector."""
+    return [biz for biz, sector in BUSINESS_TO_SECTOR.items() if sector == sector_code]
+
+
+async def fetch_spans_for_businesses(pool: asyncpg.Pool, businesses: list[str]) -> list[dict]:
+    """Fetch spans for specific businesses only."""
+    if not businesses:
+        return []
+
+    query = """
+        SELECT
+            business_id,
+            urt_primary,
+            valence,
+            intensity,
+            span_text
+        FROM pipeline.review_spans
+        WHERE business_id = ANY($1)
+        ORDER BY created_at DESC
+    """
+    rows = await pool.fetch(query, businesses)
+    return [dict(row) for row in rows]
+
+
+def analyze_sector_spans(
+    spans: list[dict],
+    config: dict[str, Any],
+    businesses: list[str],
+) -> SectorValidation:
+    """Analyze spans for a specific sector."""
+    sector_code = config["sector_code"]
+    enabled = set(config.get("enabled", []))
+    disabled = set(config.get("disabled", []))
+    weights = config.get("weights", {})
+    config_version = config.get("config_version", "1.0")
+
+    # Counters
+    primitive_counts: Counter = Counter()
+    domain_counts: Counter = Counter()
+    valence_counts: Counter = Counter()
+    urt_counts: Counter = Counter()
+    disabled_hits: Counter = Counter()
+    unmapped = 0
+    enabled_hits = 0
+
+    for span in spans:
+        urt_code = span["urt_primary"]
+        valence = span.get("valence", "V0")
+
+        urt_counts[urt_code] += 1
+        valence_counts[valence] += 1
+        domain_counts[urt_code[0]] += 1
+
+        primitive = URT_TO_PRIMITIVE.get(urt_code)
+        if primitive:
+            primitive_counts[primitive] += 1
+            if primitive in enabled:
+                enabled_hits += 1
+            elif primitive in disabled:
+                disabled_hits[primitive] += 1
+        else:
+            unmapped += 1
+
+    total = len(spans)
+    enabled_coverage = enabled_hits / total if total > 0 else 0
+
+    # Threshold-gated recommendations
+    recommended_enables = []
+    for prim, count in disabled_hits.most_common():
+        pct = count / total * 100 if total > 0 else 0
+        if pct >= ENABLE_THRESHOLD_PCT:
+            recommended_enables.append((prim, pct))
+
+    # Weight issues
+    weight_issues = []
+    for prim in weights:
+        if primitive_counts[prim] == 0 and prim in enabled:
+            weight_issues.append(f"{prim} weighted ({weights[prim]}x) but 0 appearances")
+
+    # High-frequency unweighted
+    for prim, count in primitive_counts.most_common(5):
+        pct = count / total * 100 if total > 0 else 0
+        if prim in enabled and prim not in weights and pct >= 10:
+            weight_issues.append(f"{prim} high freq ({pct:.1f}%) but unweighted")
+
+    return SectorValidation(
+        sector_code=sector_code,
+        businesses=businesses,
+        span_count=total,
+        enabled_coverage=enabled_coverage,
+        disabled_hits=dict(disabled_hits),
+        unmapped_count=unmapped,
+        primitive_counts=dict(primitive_counts),
+        domain_distribution=dict(domain_counts),
+        valence_distribution=dict(valence_counts),
+        top_urt_codes=urt_counts.most_common(15),
+        recommended_enables=recommended_enables,
+        weight_issues=weight_issues,
+        validated_at=datetime.utcnow().isoformat(),
+        config_version=config_version,
+    )
+
+
+def print_sector_report(result: SectorValidation, config: dict):
+    """Print detailed validation report for a sector."""
+    print("\n" + "=" * 70)
+    print(f"SECTOR-SCOPED VALIDATION: {result.sector_code}")
+    print("=" * 70)
+
+    print(f"\n📊 DATA SOURCE")
+    print(f"   Businesses: {', '.join(result.businesses)}")
+    print(f"   Total spans: {result.span_count:,}")
+    print(f"   Config version: {result.config_version}")
+
+    print(f"\n📈 COVERAGE")
+    print(f"   Enabled coverage: {result.enabled_coverage:.1%}")
+    print(f"   Unmapped (R-domain): {result.unmapped_count} ({result.unmapped_count/result.span_count*100:.1f}%)" if result.span_count > 0 else "")
+
+    # Domain distribution
+    print(f"\n📁 DOMAIN DISTRIBUTION")
+    domain_names = {"O": "Offering", "P": "People", "J": "Journey",
+                    "E": "Environment", "A": "Access", "V": "Value", "R": "Relationship"}
+    for domain in "OPJEVRA":
+        count = result.domain_distribution.get(domain, 0)
+        pct = count / result.span_count * 100 if result.span_count > 0 else 0
+        bar = "█" * int(pct / 2)
+        print(f"   {domain} {domain_names.get(domain, '?'):12} {count:4} ({pct:5.1f}%) {bar}")
+
+    # Top primitives
+    print(f"\n🔝 TOP PRIMITIVES (sector-scoped)")
+    enabled_set = set(config.get("enabled", []))
+    disabled_set = set(config.get("disabled", []))
+    weights = config.get("weights", {})
+
+    for prim, count in sorted(result.primitive_counts.items(), key=lambda x: -x[1])[:12]:
+        pct = count / result.span_count * 100 if result.span_count > 0 else 0
+        if prim in enabled_set:
+            status = "✓"
+        elif prim in disabled_set:
+            status = "✗"
+        else:
+            status = "?"
+        weight = f"({weights[prim]}x)" if prim in weights else ""
+        print(f"   {status} {prim:20} {count:4} ({pct:5.1f}%) {weight}")
+
+    # Threshold-gated recommendations
+    if result.recommended_enables:
+        print(f"\n⚠️  RECOMMENDED ENABLES (≥{ENABLE_THRESHOLD_PCT}% threshold)")
+        for prim, pct in result.recommended_enables:
+            count = result.disabled_hits.get(prim, 0)
+            print(f"   → ENABLE {prim}: {count} spans ({pct:.1f}%)")
+    else:
+        print(f"\n✅ No primitives exceed {ENABLE_THRESHOLD_PCT}% threshold for enabling")
+
+    # Low-frequency disabled (info only)
+    low_freq_disabled = [(p, c) for p, c in result.disabled_hits.items()
+                         if c / result.span_count * 100 < ENABLE_THRESHOLD_PCT]
+    if low_freq_disabled:
+        print(f"\n📋 DISABLED BUT APPEARING (below threshold - no action)")
+        for prim, count in sorted(low_freq_disabled, key=lambda x: -x[1])[:5]:
+            pct = count / result.span_count * 100
+            print(f"   {prim}: {count} ({pct:.1f}%)")
+
+    # Weight issues
+    if result.weight_issues:
+        print(f"\n⚖️  WEIGHT ISSUES")
+        for issue in result.weight_issues:
+            print(f"   • {issue}")
+
+    print(f"\n⏱️  Validated at: {result.validated_at}")
+    print("=" * 70)
+
+
+async def validate_sector(
+    sector_code: str,
+    db_url: str | None = None,
+    verbose: bool = True,
+) -> SectorValidation | None:
+    """Validate a single sector with sector-scoped data."""
+
+    if sector_code not in SECTORS_WITH_DATA:
+        if verbose:
+            print(f"⚠️  {sector_code}: No real business data available for validation")
+        return None
+
+    config = load_l1_config(sector_code)
+    if not config:
+        if verbose:
+            print(f"❌ No L1 config found for {sector_code}")
+        return None
+
+    businesses = get_businesses_for_sector(sector_code)
+    if not businesses:
+        if verbose:
+            print(f"⚠️  {sector_code}: No businesses mapped")
+        return None
+
+    db_url = db_url or os.environ.get(
+        "DATABASE_URL",
+        "postgresql://scraper:scraper123@localhost:5437/scraper"
+    )
+
+    pool = await asyncpg.create_pool(db_url)
+
+    try:
+        spans = await fetch_spans_for_businesses(pool, businesses)
+        if not spans:
+            if verbose:
+                print(f"⚠️  {sector_code}: No spans found for businesses")
+            return None
+
+        result = analyze_sector_spans(spans, config, businesses)
+
+        if verbose:
+            print_sector_report(result, config)
+
+        return result
+
+    finally:
+        await pool.close()
+
+
+async def validate_all_sectors(db_url: str | None = None) -> dict[str, SectorValidation]:
+    """Validate all sectors with available data."""
+    results = {}
+
+    for sector in SECTORS_WITH_DATA:
+        result = await validate_sector(sector, db_url, verbose=True)
+        if result:
+            results[sector] = result
+
+    # Print summary
+    print("\n" + "=" * 70)
+    print("VALIDATION SUMMARY")
+    print("=" * 70)
+    print(f"\n{'Sector':<20} {'Spans':>8} {'Coverage':>10} {'Enables':>10}")
+    print("-" * 50)
+
+    for sector, result in results.items():
+        enables = len(result.recommended_enables)
+        enables_str = f"{enables} recs" if enables > 0 else "✓ OK"
+        print(f"{sector:<20} {result.span_count:>8,} {result.enabled_coverage:>9.1%} {enables_str:>10}")
+
+    print("-" * 50)
+    print(f"Sectors validated: {len(results)}/{len(SECTORS_WITH_DATA)}")
+    print(f"Sectors without data: {20 - len(SECTORS_WITH_DATA)}")
+
+    return results
+
+
+async def generate_summary_report(db_url: str | None = None) -> dict:
+    """Generate a JSON summary report for all sectors."""
+    results = {}
+
+    for sector in SECTORS_WITH_DATA:
+        result = await validate_sector(sector, db_url, verbose=False)
+        if result:
+            results[sector] = {
+                "span_count": result.span_count,
+                "enabled_coverage": round(result.enabled_coverage, 3),
+                "recommended_enables": result.recommended_enables,
+                "weight_issues": result.weight_issues,
+                "config_version": result.config_version,
+                "validated_at": result.validated_at,
+            }
+
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Sector-scoped L1 config validation")
+    parser.add_argument("--sector", help="Validate specific sector")
+    parser.add_argument("--all", action="store_true", help="Validate all sectors with data")
+    parser.add_argument("--report", action="store_true", help="Generate JSON summary report")
+    parser.add_argument("--db-url", help="Database URL")
+
+    args = parser.parse_args()
+
+    if args.report:
+        results = asyncio.run(generate_summary_report(args.db_url))
+        print(json.dumps(results, indent=2))
+    elif args.all:
+        asyncio.run(validate_all_sectors(args.db_url))
+    elif args.sector:
+        asyncio.run(validate_sector(args.sector.upper(), args.db_url))
+    else:
+        parser.print_help()
+        print("\n\nSectors with real data:", ", ".join(sorted(SECTORS_WITH_DATA)))
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/classification_pipeline.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/classification_pipeline.py
@@ -0,0 +1,733 @@
+"""
+Classification Pipeline - LLM-powered primitives classification.
+
+Classifies reviews using the primitives taxonomy (MANNER, SPEED, VALUE_FOR_MONEY, etc.)
+and stores results in detected_spans_v2.
+
+Stages:
+- fetch: Find reviews without classification
+- classify: LLM-powered span extraction with primitives
+- save: Store results to detected_spans_v2
+
+Usage:
+    pipeline = ClassificationPipeline()
+    await pipeline.initialize()
+    result = await pipeline.process({"business_id": "Go Karts Mar Menor", "limit": 100})
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import re
+import time
+import unicodedata
+import uuid
+from collections import Counter
+from datetime import datetime
+from typing import TYPE_CHECKING, Any
+
+from pipeline_core import (
+    BasePipeline,
+    DashboardConfig,
+    DashboardSection,
+    PipelineMetadata,
+    PipelineResult as BasePipelineResult,
+    StageResult,
+    WidgetConfig,
+)
+
+from reviewiq_pipeline.config import Config
+from reviewiq_pipeline.db.connection import DatabasePool
+from reviewiq_pipeline.services.llm_client import LLMClient, LLMClientBase
+
+if TYPE_CHECKING:
+    import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# Stage names
+STAGE_NAMES = ["fetch", "classify", "save"]
+
+# Primitives taxonomy - maps primitive to domain
+PRIMITIVES_BY_DOMAIN = {
+    "O": ["TASTE", "CRAFT", "FRESHNESS", "TEMPERATURE", "EFFECTIVENESS", "ACCURACY", "CONDITION", "CONSISTENCY"],
+    "P": ["MANNER", "COMPETENCE", "ATTENTIVENESS", "COMMUNICATION"],
+    "J": ["SPEED", "FRICTION", "RELIABILITY", "AVAILABILITY"],
+    "E": ["CLEANLINESS", "COMFORT", "SAFETY", "AMBIANCE", "ACCESSIBILITY", "DIGITAL_UX"],
+    "V": ["PRICE_LEVEL", "PRICE_FAIRNESS", "PRICE_TRANSPARENCY", "VALUE_FOR_MONEY"],
+    "meta": ["HONESTY", "ETHICS", "PROMISES", "ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",
+             "RETURN_INTENT", "RECOMMEND", "RECOGNITION", "UNMAPPED", "NON_INFORMATIVE"],
+}
+
+# Flatten for lookup
+ALL_PRIMITIVES = []
+PRIMITIVE_TO_DOMAIN = {}
+for domain, primitives in PRIMITIVES_BY_DOMAIN.items():
+    for p in primitives:
+        ALL_PRIMITIVES.append(p)
+        PRIMITIVE_TO_DOMAIN[p] = domain
+
+# Classification prompt
+CLASSIFICATION_PROMPT = """You are a review classifier using primitive-based analysis.
+
+## TASK
+Extract semantic spans from customer reviews and classify each span to exactly ONE primitive.
+
+## PRIMITIVES (use ONLY these)
+### OUTPUT (O) - Product/Service Quality
+- TASTE: Flavor quality (food/beverage)
+- CRAFT: Skill of execution, craftsmanship
+- FRESHNESS: How fresh/new the product is
+- TEMPERATURE: Serving temperature
+- EFFECTIVENESS: Does it work/achieve purpose
+- ACCURACY: Correct execution of order
+- CONDITION: State at delivery
+- CONSISTENCY: Same quality each time
+
+### PEOPLE (P) - Staff Interactions
+- MANNER: Friendliness and warmth
+- COMPETENCE: Knowledge and skill
+- ATTENTIVENESS: Being present and responsive
+- COMMUNICATION: Clarity and updates
+
+### JOURNEY (J) - Process and Timing
+- SPEED: How fast things happen
+- FRICTION: Ease of process
+- RELIABILITY: Dependable service
+- AVAILABILITY: Access to service/staff
+
+### ENVIRONMENT (E) - Physical/Digital Space
+- CLEANLINESS: Hygiene and tidiness
+- COMFORT: Physical ease
+- SAFETY: Physical safety
+- AMBIANCE: Overall mood/atmosphere
+- ACCESSIBILITY: Ease of access
+- DIGITAL_UX: Digital experience
+
+### VALUE (V) - Cost and Worth
+- PRICE_LEVEL: Absolute cost
+- PRICE_FAIRNESS: Fair for what you get
+- PRICE_TRANSPARENCY: Clear about costs
+- VALUE_FOR_MONEY: Overall value assessment
+
+### META - Trust and Sentiment
+- HONESTY: Truthfulness
+- ETHICS: Moral conduct
+- PROMISES: Keeping commitments
+- ACKNOWLEDGMENT: Recognizing issues
+- RESPONSE_QUALITY: How business responds
+- RECOVERY: Making amends
+- RETURN_INTENT: Would come back
+- RECOMMEND: Would suggest to others
+- RECOGNITION: Customer acknowledgment
+- UNMAPPED: Cannot classify (use sparingly)
+- NON_INFORMATIVE: No actionable content
+
+## RULES
+1. Extract 1-5 spans per review (prefer fewer, larger spans about same topic)
+2. Each span gets exactly ONE primitive (most specific match)
+3. Valence: + (positive), - (negative), 0 (neutral), ± (mixed)
+4. Intensity: 1 (low), 2 (moderate), 3 (high/extreme)
+5. Detail: 1 (vague), 2 (some detail), 3 (specific/actionable)
+6. Confidence: 0.0 to 1.0
+
+## OUTPUT FORMAT (JSON only)
+{
+  "spans": [
+    {
+      "text": "exact text from review",
+      "start": 0,
+      "end": 25,
+      "primitive": "MANNER",
+      "valence": "+",
+      "intensity": 2,
+      "detail": 2,
+      "confidence": 0.85,
+      "entity": null,
+      "entity_type": null
+    }
+  ]
+}
+
+Return valid JSON only, no markdown."""
+
+# Non-informative detection
+PURE_JUNK_RE = re.compile(
+    r'^[\s\.\!\?\,\-\_\~\*\#\@]+$'
+    r'|^[\U0001F300-\U0001F9FF\U0001FA00-\U0001FAFF\U00002600-\U000027BF\s\.\!\?]+$'
+    r'|^(translated by google|traducido por google)[\.\s]*$',
+    re.IGNORECASE
+)
+
+
+def is_non_informative(text: str) -> tuple[bool, str]:
+    """Check if review is non-informative (skip LLM)."""
+    if not text or not text.strip():
+        return True, "empty"
+
+    text = text.strip()
+
+    if PURE_JUNK_RE.match(text):
+        return True, "junk_pattern"
+
+    # No alphanumeric content
+    alpha_chars = sum(1 for c in text if unicodedata.category(c).startswith('L'))
+    digit_chars = sum(1 for c in text if unicodedata.category(c).startswith('N'))
+    if alpha_chars == 0 and digit_chars == 0:
+        return True, "no_content"
+
+    # Pure repetition
+    tokens = text.split()
+    if len(tokens) >= 3:
+        unique_tokens = len(set(t.lower() for t in tokens))
+        if unique_tokens == 1 and alpha_chars < 20:
+            return True, "pure_repetition"
+
+    return False, ""
+
+
+def compute_review_hash(text: str) -> str:
+    """Compute hash for review text (for deduplication)."""
+    normalized = text.strip().lower()
+    return hashlib.sha256(normalized.encode()).hexdigest()[:16]
+
+
+class ClassificationPipeline(BasePipeline):
+    """
+    Classification Pipeline - LLM-powered primitives classification.
+
+    Processes reviews through LLM to extract semantic spans and classify
+    them using the primitives taxonomy.
+    """
+
+    def __init__(self, config: Config | None = None):
+        """Initialize the pipeline."""
+        self._config = config or Config()
+        self._db: DatabasePool | None = None
+        self._llm: LLMClientBase | None = None
+        self._initialized = False
+
+    @property
+    def config(self) -> Config:
+        """Get pipeline configuration."""
+        return self._config
+
+    @property
+    def metadata(self) -> PipelineMetadata:
+        """Get pipeline metadata."""
+        return PipelineMetadata(
+            id="classification",
+            name="Primitives Classification Pipeline",
+            description="LLM-powered span extraction and primitives classification. Processes reviews and stores results in detected_spans_v2.",
+            version="1.0.0",
+            stages=STAGE_NAMES,
+            input_type="BusinessInput",
+        )
+
+    async def initialize(self) -> None:
+        """Initialize database and LLM connections."""
+        if self._initialized:
+            return
+
+        logger.info("Initializing Classification pipeline...")
+
+        self._db = DatabasePool(self._config)
+        await self._db.initialize()
+
+        self._llm = LLMClient.create(self._config)
+        self._llm.set_prompt(CLASSIFICATION_PROMPT)
+
+        self._initialized = True
+        logger.info("Classification pipeline initialized")
+
+    async def close(self) -> None:
+        """Close all connections."""
+        if self._llm:
+            await self._llm.close()
+            self._llm = None
+
+        if self._db:
+            await self._db.close()
+            self._db = None
+
+        self._initialized = False
+        logger.info("Classification pipeline closed")
+
+    async def process(
+        self,
+        input_data: dict[str, Any],
+        stages: list[str] | None = None,
+    ) -> BasePipelineResult:
+        """
+        Process reviews through classification.
+
+        Args:
+            input_data: Must contain business_id OR job_id. Optional: limit, batch_size
+            stages: List of stage names to run (default: all)
+
+        Returns:
+            BasePipelineResult with classification stats
+        """
+        await self.initialize()
+
+        stages = stages or STAGE_NAMES
+        stages_run: list[str] = []
+        stage_results: dict[str, Any] = {}
+
+        business_id = input_data.get("business_id")
+        job_id = input_data.get("job_id")
+        limit = input_data.get("limit", 100)
+        batch_size = input_data.get("batch_size", 10)
+
+        # Resolve business_id from job_id
+        if not business_id and job_id:
+            try:
+                async with self._db.pool.acquire() as conn:
+                    row = await conn.fetchrow(
+                        "SELECT business_name FROM jobs WHERE job_id = $1",
+                        uuid.UUID(job_id) if isinstance(job_id, str) else job_id,
+                    )
+                    if row and row["business_name"]:
+                        business_id = row["business_name"]
+                        logger.info(f"Resolved business_id '{business_id}' from job_id")
+            except Exception as e:
+                logger.warning(f"Failed to resolve business_id: {e}")
+
+        if not business_id:
+            return BasePipelineResult(
+                pipeline_id="classification",
+                stages_run=[],
+                stage_results={},
+                success=False,
+                error="business_id is required (provide business_id or job_id)",
+            )
+
+        # Generate run_id for this execution
+        run_id = uuid.uuid4()
+        context = {
+            "business_id": business_id,
+            "job_id": job_id,
+            "limit": limit,
+            "batch_size": batch_size,
+            "run_id": run_id,
+            "reviews": [],
+            "classified": [],
+        }
+
+        try:
+            # Stage: Fetch unclassified reviews
+            if "fetch" in stages:
+                start = time.time()
+                logger.info(f"Fetching unclassified reviews for {business_id}")
+
+                try:
+                    reviews = await self._fetch_unclassified(business_id, limit)
+                    context["reviews"] = reviews
+                    duration_ms = int((time.time() - start) * 1000)
+                    stages_run.append("fetch")
+                    stage_results["fetch"] = StageResult(
+                        stage="fetch",
+                        success=True,
+                        data={"reviews_found": len(reviews)},
+                        error=None,
+                        duration_ms=duration_ms,
+                    )
+                    logger.info(f"Found {len(reviews)} unclassified reviews")
+                except Exception as e:
+                    logger.exception("Fetch failed")
+                    return BasePipelineResult(
+                        pipeline_id="classification",
+                        stages_run=stages_run,
+                        stage_results=stage_results,
+                        success=False,
+                        error=f"Fetch failed: {e}",
+                    )
+
+            # Stage: Classify reviews
+            if "classify" in stages and context["reviews"]:
+                start = time.time()
+                logger.info(f"Classifying {len(context['reviews'])} reviews")
+
+                try:
+                    classified = await self._classify_reviews(
+                        context["reviews"],
+                        business_id,
+                        batch_size,
+                    )
+                    context["classified"] = classified
+                    duration_ms = int((time.time() - start) * 1000)
+                    stages_run.append("classify")
+
+                    total_spans = sum(len(c.get("spans", [])) for c in classified)
+                    stage_results["classify"] = StageResult(
+                        stage="classify",
+                        success=True,
+                        data={
+                            "reviews_classified": len(classified),
+                            "total_spans": total_spans,
+                            "llm_cost_usd": self._llm.total_cost_usd if self._llm else 0,
+                        },
+                        error=None,
+                        duration_ms=duration_ms,
+                    )
+                    logger.info(f"Classified {len(classified)} reviews, {total_spans} spans")
+                except Exception as e:
+                    logger.exception("Classification failed")
+                    stage_results["classify"] = StageResult(
+                        stage="classify",
+                        success=False,
+                        data={},
+                        error=str(e),
+                        duration_ms=int((time.time() - start) * 1000),
+                    )
+
+            # Stage: Save results
+            if "save" in stages and context["classified"]:
+                start = time.time()
+                logger.info(f"Saving {len(context['classified'])} classifications")
+
+                try:
+                    saved_count = await self._save_classifications(
+                        context["classified"],
+                        business_id,
+                        job_id,
+                        run_id,
+                    )
+                    duration_ms = int((time.time() - start) * 1000)
+                    stages_run.append("save")
+                    stage_results["save"] = StageResult(
+                        stage="save",
+                        success=True,
+                        data={"spans_saved": saved_count},
+                        error=None,
+                        duration_ms=duration_ms,
+                    )
+                    logger.info(f"Saved {saved_count} spans to detected_spans_v2")
+                except Exception as e:
+                    logger.exception("Save failed")
+                    stage_results["save"] = StageResult(
+                        stage="save",
+                        success=False,
+                        data={},
+                        error=str(e),
+                        duration_ms=int((time.time() - start) * 1000),
+                    )
+
+            return BasePipelineResult(
+                pipeline_id="classification",
+                stages_run=stages_run,
+                stage_results=stage_results,
+                success=all(stage_results.get(s, {}).get("success", False) for s in stages_run),
+            )
+
+        except Exception as e:
+            logger.exception("Pipeline failed")
+            return BasePipelineResult(
+                pipeline_id="classification",
+                stages_run=stages_run,
+                stage_results=stage_results,
+                success=False,
+                error=str(e),
+            )
+
+    async def _fetch_unclassified(
+        self,
+        business_id: str,
+        limit: int,
+    ) -> list[dict[str, Any]]:
+        """Fetch reviews that haven't been classified yet."""
+        async with self._db.pool.acquire() as conn:
+            # Get reviews from reviews_latest that don't have spans in detected_spans_v2
+            rows = await conn.fetch(
+                """
+                SELECT
+                    r.review_id,
+                    r.business_id,
+                    r.text AS review_text,
+                    r.rating,
+                    r.review_time
+                FROM pipeline.reviews_latest r
+                LEFT JOIN (
+                    SELECT DISTINCT review_id, business_id
+                    FROM pipeline.detected_spans_v2
+                ) s ON s.review_id = r.review_id AND s.business_id = r.business_id
+                WHERE r.business_id = $1
+                    AND s.review_id IS NULL
+                    AND r.text IS NOT NULL
+                    AND LENGTH(r.text) > 0
+                ORDER BY r.review_time DESC
+                LIMIT $2
+                """,
+                business_id,
+                limit,
+            )
+
+            return [
+                {
+                    "review_id": row["review_id"],
+                    "business_id": row["business_id"],
+                    "text": row["review_text"],
+                    "rating": row["rating"] or 3,
+                    "review_time": row["review_time"],
+                }
+                for row in rows
+            ]
+
+    async def _classify_reviews(
+        self,
+        reviews: list[dict[str, Any]],
+        business_id: str,
+        batch_size: int,
+    ) -> list[dict[str, Any]]:
+        """Classify reviews using LLM."""
+        results = []
+
+        for review in reviews:
+            text = review.get("text", "")
+            rating = review.get("rating", 3)
+
+            # Check for non-informative
+            is_junk, reason = is_non_informative(text)
+            if is_junk:
+                results.append({
+                    "review_id": review["review_id"],
+                    "business_id": business_id,
+                    "text": text,
+                    "rating": rating,
+                    "spans": [{
+                        "text": text,
+                        "start": 0,
+                        "end": len(text),
+                        "primitive": "NON_INFORMATIVE",
+                        "valence": "0",
+                        "intensity": 1,
+                        "detail": 1,
+                        "confidence": 1.0,
+                        "entity": None,
+                        "entity_type": None,
+                        "mode": reason,
+                    }],
+                    "review_hash": compute_review_hash(text),
+                })
+                continue
+
+            # Classify with LLM
+            try:
+                user_prompt = f"Rating: {rating}/5\nText: {text}"
+                response, metadata = await self._llm.classify(text)
+
+                spans = response.get("spans", [])
+
+                # Validate primitives
+                for span in spans:
+                    if span.get("primitive") not in ALL_PRIMITIVES:
+                        span["primitive"] = "UNMAPPED"
+                        span["unmapped_keywords"] = [span.get("primitive", "unknown")]
+
+                results.append({
+                    "review_id": review["review_id"],
+                    "business_id": business_id,
+                    "text": text,
+                    "rating": rating,
+                    "spans": spans,
+                    "review_hash": compute_review_hash(text),
+                    "model": metadata.get("model"),
+                })
+
+            except Exception as e:
+                logger.warning(f"LLM classification failed for review {review['review_id']}: {e}")
+                # Fallback to UNMAPPED
+                results.append({
+                    "review_id": review["review_id"],
+                    "business_id": business_id,
+                    "text": text,
+                    "rating": rating,
+                    "spans": [{
+                        "text": text,
+                        "start": 0,
+                        "end": len(text),
+                        "primitive": "UNMAPPED",
+                        "valence": "0",
+                        "intensity": 1,
+                        "detail": 1,
+                        "confidence": 0.0,
+                        "entity": None,
+                        "entity_type": None,
+                        "mode": "llm_error",
+                    }],
+                    "review_hash": compute_review_hash(text),
+                })
+
+        return results
+
+    async def _save_classifications(
+        self,
+        classifications: list[dict[str, Any]],
+        business_id: str,
+        job_id: str | None,
+        run_id: uuid.UUID,
+    ) -> int:
+        """Save classification results to detected_spans_v2."""
+        saved_count = 0
+        config_version = f"primitives_v1_{datetime.utcnow().strftime('%Y%m%d')}"
+
+        async with self._db.pool.acquire() as conn:
+            # Get GBP path for business
+            gbp_row = await conn.fetchrow(
+                """
+                SELECT gbp_category_path
+                FROM jobs
+                WHERE business_name = $1
+                    AND gbp_category_path IS NOT NULL
+                ORDER BY created_at DESC
+                LIMIT 1
+                """,
+                business_id,
+            )
+            gbp_path = str(gbp_row["gbp_category_path"]) if gbp_row and gbp_row["gbp_category_path"] else "unknown"
+
+            for classification in classifications:
+                review_id = classification["review_id"]
+                review_hash = classification.get("review_hash")
+                model = classification.get("model")
+
+                for span in classification.get("spans", []):
+                    try:
+                        await conn.execute(
+                            """
+                            INSERT INTO pipeline.detected_spans_v2 (
+                                job_id, business_id, review_id, gbp_path, sector_code,
+                                config_version, primitive, valence, intensity, detail, mode,
+                                confidence, span_text, span_start, span_end,
+                                unmapped_keywords, entity, entity_type,
+                                model, review_hash, run_id, created_at
+                            ) VALUES (
+                                $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11,
+                                $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, NOW()
+                            )
+                            """,
+                            uuid.UUID(job_id) if job_id else None,
+                            business_id,
+                            review_id,
+                            gbp_path,
+                            gbp_path.split(".")[0] if "." in gbp_path else gbp_path,
+                            config_version,
+                            span.get("primitive", "UNMAPPED"),
+                            span.get("valence", "0"),
+                            span.get("intensity", 1),
+                            span.get("detail", 1),
+                            span.get("mode"),
+                            span.get("confidence", 0.5),
+                            span.get("text", ""),
+                            span.get("start", 0),
+                            span.get("end", 0),
+                            span.get("unmapped_keywords"),
+                            span.get("entity"),
+                            span.get("entity_type"),
+                            model,
+                            review_hash,
+                            run_id,
+                        )
+                        saved_count += 1
+                    except Exception as e:
+                        logger.warning(f"Failed to save span: {e}")
+
+        return saved_count
+
+    def get_dashboard_config(self) -> DashboardConfig:
+        """Get dashboard configuration."""
+        return DashboardConfig(
+            pipeline_id="classification",
+            title="Classification Pipeline",
+            description="Monitor classification progress and quality",
+            sections=[
+                DashboardSection(
+                    id="stats",
+                    title="Classification Stats",
+                    widgets=[
+                        WidgetConfig(
+                            id="reviews_classified",
+                            type="stat_card",
+                            title="Reviews Classified",
+                            grid={"x": 0, "y": 0, "w": 3, "h": 1},
+                            config={"value_key": "reviews_classified"},
+                        ),
+                        WidgetConfig(
+                            id="total_spans",
+                            type="stat_card",
+                            title="Total Spans",
+                            grid={"x": 3, "y": 0, "w": 3, "h": 1},
+                            config={"value_key": "total_spans"},
+                        ),
+                        WidgetConfig(
+                            id="llm_cost",
+                            type="stat_card",
+                            title="LLM Cost",
+                            grid={"x": 6, "y": 0, "w": 3, "h": 1},
+                            config={"value_key": "llm_cost_usd", "format": "${value:.4f}"},
+                        ),
+                    ],
+                ),
+            ],
+            default_time_range="7d",
+            refresh_interval=60,
+        )
+
+    async def get_widget_data(
+        self,
+        widget_id: str,
+        params: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Get data for dashboard widgets."""
+        await self.initialize()
+
+        business_id = params.get("business_id")
+        if not business_id:
+            return {"error": "business_id required"}
+
+        async with self._db.pool.acquire() as conn:
+            row = await conn.fetchrow(
+                """
+                SELECT
+                    COUNT(DISTINCT review_id) as reviews_classified,
+                    COUNT(*) as total_spans
+                FROM pipeline.detected_spans_v2
+                WHERE business_id = $1
+                """,
+                business_id,
+            )
+
+            return {
+                "reviews_classified": row["reviews_classified"] or 0,
+                "total_spans": row["total_spans"] or 0,
+                "llm_cost_usd": 0,  # Would need to track this
+            }
+
+    async def health_check(self) -> dict[str, Any]:
+        """Check pipeline health."""
+        await self.initialize()
+
+        checks = {}
+        healthy = True
+
+        # Check database
+        try:
+            async with self._db.pool.acquire() as conn:
+                await conn.fetchval("SELECT 1")
+            checks["database"] = "ok"
+        except Exception as e:
+            checks["database"] = str(e)
+            healthy = False
+
+        # Check LLM
+        try:
+            if self._llm:
+                checks["llm"] = f"{self._config.llm_provider}/{self._config.llm_model}"
+            else:
+                checks["llm"] = "not_initialized"
+        except Exception as e:
+            checks["llm"] = str(e)
+
+        return {"healthy": healthy, "checks": checks}
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/config.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/config.py
@@ -76,6 +76,51 @@ class Config(BaseSettings):
    batch_size: int = Field(default=50, ge=1, le=500)
    trust_score_floor: float = Field(default=0.2, ge=0.0, le=1.0)

+    # Batched Classification
+    classification_batch_size: int = Field(
+        default=0,
+        ge=0,
+        le=200,
+        description="Number of reviews per LLM call. 0 = auto-calculate based on context window",
+    )
+    classification_max_concurrent: int = Field(
+        default=0,
+        ge=0,
+        description="Maximum concurrent batch requests. 0 = unlimited (run all batches in parallel)",
+    )
+    classification_target_utilization: float = Field(
+        default=0.70,
+        ge=0.3,
+        le=0.85,
+        description="Target context window utilization. Optimal: 0.60-0.75. Above 0.85 causes ~23% quality degradation.",
+    )
+    use_prompt_caching: bool = Field(
+        default=True,
+        description="Enable prompt caching for cost reduction (OpenAI/Anthropic)",
+    )
+
+    # Smart Review Router (cost optimization)
+    router_enabled: bool = Field(
+        default=False,
+        description="Enable smart review routing to skip/route trivial reviews",
+    )
+    router_skip_enabled: bool = Field(
+        default=True,
+        description="Allow SKIP tier (no LLM, assign generic code)",
+    )
+    router_cheap_model_enabled: bool = Field(
+        default=True,
+        description="Allow CHEAP tier (use Haiku instead of Sonnet)",
+    )
+    router_cheap_model: str = Field(
+        default="claude-3-5-haiku-20241022",
+        description="Model to use for CHEAP tier routing",
+    )
+    router_conservative: bool = Field(
+        default=True,
+        description="Use conservative routing (fewer false negatives)",
+    )
+
    # Migrations
    migrations_path: str = Field(
        default="",
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/contracts.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/contracts.py
@@ -7,6 +7,7 @@ enabling independent development and validation of each stage.

 from __future__ import annotations

+from datetime import date
 from typing import Any, Literal, TypedDict


@@ -181,13 +182,14 @@ class ReviewToClassify(TypedDict):
    review_time: str


-class ClassificationConfig(TypedDict):
+class ClassificationConfig(TypedDict, total=False):
    """Configuration for LLM classification."""

    model: str
    taxonomy_version: str
    profile: ProfileType
    max_spans_per_review: int
+    job_id: str | None  # Optional job_id for tracking


 class Stage2Input(TypedDict):
@@ -329,6 +331,7 @@ class Stage3Input(TypedDict):
    """Input to Stage 3 issue routing."""

    spans: list[SpanToRoute]
+    job_id: str | None  # Optional job_id for linking issues to pipeline executions


 class RoutedSpan(TypedDict):
@@ -379,7 +382,7 @@ class FactRecord(TypedDict, total=False):
    # Keys
    business_id: str
    place_id: str
-    period_date: str
+    period_date: date
    bucket_type: str
    subject_type: SubjectType
    subject_id: str
@@ -574,7 +577,7 @@ class FactTimeseries(TypedDict, total=False):
    id: int
    business_id: str
    place_id: str
-    period_date: str
+    period_date: date
    bucket_type: BucketType
    subject_type: SubjectType
    subject_id: str
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/db/migrations/006_add_job_id_to_issues.sql
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/db/migrations/006_add_job_id_to_issues.sql
@@ -0,0 +1,10 @@
+-- Migration: 006_add_job_id_to_issues.sql
+-- Purpose: Add job_id column to issues table for tracking pipeline execution context
+
+-- Add job_id column to issues table
+ALTER TABLE pipeline.issues ADD COLUMN IF NOT EXISTS job_id UUID;
+
+-- Create index for filtering by job_id
+CREATE INDEX IF NOT EXISTS idx_issues_job_id ON pipeline.issues(job_id);
+
+COMMENT ON COLUMN pipeline.issues.job_id IS 'References the scraper job that triggered the pipeline execution';
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/db/migrations/013_implement_urt_ltree.sql
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/db/migrations/013_implement_urt_ltree.sql
@@ -0,0 +1,352 @@
+-- Migration: Implement URT taxonomy with PostgreSQL ltree
+-- Benefits:
+--   1. Hierarchical queries (find all codes under a domain/category)
+--   2. Ancestor/descendant lookups in O(1)
+--   3. Pattern matching on paths (e.g., 'O.*' for all Offering codes)
+--   4. Efficient GiST indexing for tree operations
+--   5. Aggregations at any level of hierarchy
+
+-- Enable ltree extension
+CREATE EXTENSION IF NOT EXISTS ltree;
+
+-- ============================================================================
+-- NEW UNIFIED TAXONOMY TABLE
+-- ============================================================================
+
+CREATE TABLE IF NOT EXISTS pipeline.urt_taxonomy (
+    id SERIAL PRIMARY KEY,
+
+    -- ltree path: Domain.Category.Subcode (e.g., 'O.O1.O1_01')
+    path ltree NOT NULL UNIQUE,
+
+    -- Human-readable code (e.g., 'O1.01')
+    code VARCHAR(10) NOT NULL UNIQUE,
+
+    -- Node type for filtering
+    node_type VARCHAR(20) NOT NULL CHECK (node_type IN ('domain', 'category', 'subcode')),
+
+    -- Hierarchy level (1=domain, 2=category, 3=subcode)
+    level INT GENERATED ALWAYS AS (nlevel(path)) STORED,
+
+    -- Names and definitions
+    name VARCHAR(100) NOT NULL,
+    definition TEXT,
+
+    -- Examples (for subcodes)
+    positive_example TEXT,
+    negative_example TEXT,
+
+    -- Actionability (for subcodes)
+    solution TEXT,
+    solution_complexity VARCHAR(10) DEFAULT 'medium',
+    marketing_angle TEXT,
+
+    -- Owner routing
+    default_owner VARCHAR(50),
+
+    -- Metadata
+    is_active BOOLEAN DEFAULT TRUE,
+    created_at TIMESTAMP DEFAULT NOW(),
+    updated_at TIMESTAMP DEFAULT NOW()
+);
+
+-- ============================================================================
+-- INDEXES FOR LTREE OPERATIONS
+-- ============================================================================
+
+-- GiST index for ltree operations (ancestor, descendant, pattern matching)
+CREATE INDEX idx_urt_taxonomy_path_gist ON pipeline.urt_taxonomy USING GIST (path);
+
+-- B-tree index for exact path lookups and sorting
+CREATE INDEX idx_urt_taxonomy_path_btree ON pipeline.urt_taxonomy USING BTREE (path);
+
+-- Index for code lookups (most common operation)
+CREATE INDEX idx_urt_taxonomy_code ON pipeline.urt_taxonomy (code);
+
+-- Index for node type filtering
+CREATE INDEX idx_urt_taxonomy_node_type ON pipeline.urt_taxonomy (node_type);
+
+-- ============================================================================
+-- MIGRATE EXISTING DATA
+-- ============================================================================
+
+-- Insert domains (level 1)
+INSERT INTO pipeline.urt_taxonomy (path, code, node_type, name, definition, default_owner)
+SELECT
+    code::ltree as path,
+    code,
+    'domain',
+    name,
+    -- Domain definitions from spec
+    CASE code
+        WHEN 'O' THEN 'Does the core product/service deliver?'
+        WHEN 'P' THEN 'How do personnel behave and perform?'
+        WHEN 'J' THEN 'Is the process smooth and timely?'
+        WHEN 'E' THEN 'Is the space functional and pleasant?'
+        WHEN 'A' THEN 'Can everyone participate fully?'
+        WHEN 'V' THEN 'Is the exchange fair and transparent?'
+        WHEN 'R' THEN 'Is trust built and maintained?'
+    END,
+    CASE code
+        WHEN 'O' THEN 'Product/Operations'
+        WHEN 'P' THEN 'HR/Training'
+        WHEN 'J' THEN 'Operations/Process'
+        WHEN 'E' THEN 'Facilities/IT'
+        WHEN 'A' THEN 'Compliance/Design'
+        WHEN 'V' THEN 'Finance/Pricing'
+        WHEN 'R' THEN 'Leadership/CX'
+    END
+FROM pipeline.urt_domains
+ON CONFLICT (code) DO NOTHING;
+
+-- Insert categories (level 2)
+INSERT INTO pipeline.urt_taxonomy (path, code, node_type, name, definition)
+SELECT
+    (domain_code || '.' || code)::ltree as path,
+    code,
+    'category',
+    name,
+    NULL -- Categories don't have definitions in current schema
+FROM pipeline.urt_categories
+ON CONFLICT (code) DO NOTHING;
+
+-- Insert subcodes (level 3)
+INSERT INTO pipeline.urt_taxonomy (path, code, node_type, name, definition, positive_example, negative_example, solution, solution_complexity, marketing_angle)
+SELECT
+    (domain_code || '.' || category_code || '.' || replace(code, '.', '_'))::ltree as path,
+    code,
+    'subcode',
+    name,
+    definition,
+    positive_example,
+    negative_example,
+    solution,
+    solution_complexity,
+    marketing_angle
+FROM pipeline.urt_subcodes
+ON CONFLICT (code) DO NOTHING;
+
+-- ============================================================================
+-- HELPER FUNCTIONS
+-- ============================================================================
+
+-- Get all ancestors of a code (e.g., O1.01 -> [O, O1])
+CREATE OR REPLACE FUNCTION pipeline.urt_ancestors(p_code VARCHAR)
+RETURNS TABLE(code VARCHAR, name VARCHAR, node_type VARCHAR, level INT) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT t.code, t.name, t.node_type, t.level
+    FROM pipeline.urt_taxonomy t
+    WHERE t.path @> (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
+    AND t.code != p_code
+    ORDER BY t.level;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Get all descendants of a code (e.g., O -> all O* codes)
+CREATE OR REPLACE FUNCTION pipeline.urt_descendants(p_code VARCHAR)
+RETURNS TABLE(code VARCHAR, name VARCHAR, node_type VARCHAR, level INT) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT t.code, t.name, t.node_type, t.level
+    FROM pipeline.urt_taxonomy t
+    WHERE t.path <@ (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
+    AND t.code != p_code
+    ORDER BY t.path;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Get siblings (same parent)
+CREATE OR REPLACE FUNCTION pipeline.urt_siblings(p_code VARCHAR)
+RETURNS TABLE(code VARCHAR, name VARCHAR, level INT) AS $$
+DECLARE
+    v_parent ltree;
+BEGIN
+    SELECT subpath(path, 0, nlevel(path) - 1) INTO v_parent
+    FROM pipeline.urt_taxonomy WHERE code = p_code;
+
+    RETURN QUERY
+    SELECT t.code, t.name, t.level
+    FROM pipeline.urt_taxonomy t
+    WHERE subpath(t.path, 0, nlevel(t.path) - 1) = v_parent
+    AND t.code != p_code
+    ORDER BY t.path;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Get domain for any code
+CREATE OR REPLACE FUNCTION pipeline.urt_domain(p_code VARCHAR)
+RETURNS VARCHAR AS $$
+    SELECT code FROM pipeline.urt_taxonomy
+    WHERE path @> (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
+    AND node_type = 'domain';
+$$ LANGUAGE SQL;
+
+-- Get category for a subcode
+CREATE OR REPLACE FUNCTION pipeline.urt_category(p_code VARCHAR)
+RETURNS VARCHAR AS $$
+    SELECT code FROM pipeline.urt_taxonomy
+    WHERE path @> (SELECT path FROM pipeline.urt_taxonomy WHERE code = p_code)
+    AND node_type = 'category';
+$$ LANGUAGE SQL;
+
+-- ============================================================================
+-- VIEW: FLATTENED TAXONOMY WITH HIERARCHY INFO
+-- ============================================================================
+
+CREATE OR REPLACE VIEW pipeline.v_urt_taxonomy AS
+SELECT
+    t.id,
+    t.path,
+    t.code,
+    t.node_type,
+    t.level,
+    t.name,
+    t.definition,
+    -- Parent info
+    CASE
+        WHEN t.level > 1 THEN subpath(t.path, 0, t.level - 1)::text
+        ELSE NULL
+    END as parent_path,
+    -- Domain info (for rollups)
+    subpath(t.path, 0, 1)::text as domain_code,
+    (SELECT name FROM pipeline.urt_taxonomy WHERE path = subpath(t.path, 0, 1)) as domain_name,
+    -- Category info (for subcodes)
+    CASE
+        WHEN t.level >= 2 THEN subpath(t.path, 0, 2)::text
+        ELSE NULL
+    END as category_path,
+    -- Full path as breadcrumb
+    t.path::text as full_path,
+    -- Actionability
+    t.solution,
+    t.default_owner,
+    t.is_active
+FROM pipeline.urt_taxonomy t
+ORDER BY t.path;
+
+-- ============================================================================
+-- UPDATE REVIEW_SPANS TO USE LTREE
+-- ============================================================================
+
+-- Add ltree column to review_spans for efficient hierarchy queries
+ALTER TABLE pipeline.review_spans
+ADD COLUMN IF NOT EXISTS urt_path ltree;
+
+-- Populate ltree paths from existing codes
+UPDATE pipeline.review_spans rs
+SET urt_path = t.path
+FROM pipeline.urt_taxonomy t
+WHERE rs.urt_primary = t.code
+AND rs.urt_path IS NULL;
+
+-- Create GiST index for hierarchy queries on spans
+CREATE INDEX IF NOT EXISTS idx_review_spans_urt_path_gist
+ON pipeline.review_spans USING GIST (urt_path);
+
+-- ============================================================================
+-- EXAMPLE QUERIES (for reference)
+-- ============================================================================
+
+-- These are example queries, not executed:
+/*
+
+-- 1. Find all subcodes under "People" domain
+SELECT code, name FROM pipeline.urt_taxonomy
+WHERE path <@ 'P' AND node_type = 'subcode';
+
+-- 2. Find all codes matching pattern (e.g., all Value subcodes)
+SELECT code, name FROM pipeline.urt_taxonomy
+WHERE path ~ 'V.*' AND node_type = 'subcode';
+
+-- 3. Aggregate span counts by domain
+SELECT
+    subpath(urt_path, 0, 1)::text as domain,
+    COUNT(*) as span_count
+FROM pipeline.review_spans
+WHERE urt_path IS NOT NULL
+GROUP BY subpath(urt_path, 0, 1)
+ORDER BY span_count DESC;
+
+-- 4. Aggregate by category within a domain
+SELECT
+    subpath(urt_path, 0, 2)::text as category,
+    COUNT(*) as span_count
+FROM pipeline.review_spans
+WHERE urt_path <@ 'O'  -- All Offering codes
+GROUP BY subpath(urt_path, 0, 2)
+ORDER BY span_count DESC;
+
+-- 5. Get ancestors of a specific code
+SELECT * FROM pipeline.urt_ancestors('O1.01');
+-- Returns: O (Offering), O1 (Function)
+
+-- 6. Get all descendants of a category
+SELECT * FROM pipeline.urt_descendants('O1');
+-- Returns: O1.01, O1.02, O1.03, O1.04, O1.05
+
+-- 7. Find the domain owner for a code
+SELECT pipeline.urt_domain('P1.01');
+-- Returns: P (People)
+
+-- 8. Drill-down query: Domain -> Category -> Subcode
+WITH RECURSIVE tree AS (
+    SELECT path, code, name, level
+    FROM pipeline.urt_taxonomy
+    WHERE node_type = 'domain' AND code = 'O'
+
+    UNION ALL
+
+    SELECT t.path, t.code, t.name, t.level
+    FROM pipeline.urt_taxonomy t
+    JOIN tree ON t.path <@ tree.path AND nlevel(t.path) = nlevel(tree.path) + 1
+)
+SELECT * FROM tree ORDER BY path;
+
+*/
+
+-- ============================================================================
+-- TRIGGER: Auto-update urt_path on review_spans
+-- ============================================================================
+
+CREATE OR REPLACE FUNCTION pipeline.set_urt_path()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.urt_path := (SELECT path FROM pipeline.urt_taxonomy WHERE code = NEW.urt_primary);
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+DROP TRIGGER IF EXISTS trg_set_urt_path ON pipeline.review_spans;
+CREATE TRIGGER trg_set_urt_path
+    BEFORE INSERT OR UPDATE OF urt_primary ON pipeline.review_spans
+    FOR EACH ROW
+    EXECUTE FUNCTION pipeline.set_urt_path();
+
+-- ============================================================================
+-- MATERIALIZED VIEW: Pre-computed hierarchy rollups
+-- ============================================================================
+
+CREATE MATERIALIZED VIEW IF NOT EXISTS pipeline.mv_urt_domain_stats AS
+SELECT
+    subpath(rs.urt_path, 0, 1)::text as domain_code,
+    t.name as domain_name,
+    rs.valence,
+    COUNT(*) as span_count,
+    COUNT(DISTINCT rs.review_id) as review_count,
+    AVG(CASE rs.intensity
+        WHEN 'I1' THEN 1
+        WHEN 'I2' THEN 2
+        WHEN 'I3' THEN 3
+    END) as avg_intensity
+FROM pipeline.review_spans rs
+JOIN pipeline.urt_taxonomy t ON subpath(rs.urt_path, 0, 1) = t.path
+WHERE rs.urt_path IS NOT NULL
+GROUP BY subpath(rs.urt_path, 0, 1), t.name, rs.valence;
+
+CREATE UNIQUE INDEX ON pipeline.mv_urt_domain_stats (domain_code, valence);
+
+-- Refresh command (run periodically):
+-- REFRESH MATERIALIZED VIEW CONCURRENTLY pipeline.mv_urt_domain_stats;
+
+COMMENT ON TABLE pipeline.urt_taxonomy IS 'Unified URT taxonomy using ltree for hierarchical queries. Replaces urt_domains, urt_categories, urt_subcodes.';
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/db/repositories.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/db/repositories.py
@@ -70,16 +70,18 @@ class ReviewRepository:
        self,
        review: NormalizedReview,
        raw_id: int,
+        job_id: str | None = None,
    ) -> int:
        """Insert an enriched review stub (pre-classification)."""
        query = """
            INSERT INTO pipeline.reviews_enriched (
                source, review_id, review_version, is_latest, raw_id,
                business_id, place_id, text, text_normalized, rating, review_time,
-                language, taxonomy_version
-            ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
+                language, taxonomy_version, job_id
+            ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14::uuid)
            ON CONFLICT (source, review_id, review_version) DO UPDATE SET
-                is_latest = EXCLUDED.is_latest
+                is_latest = EXCLUDED.is_latest,
+                job_id = COALESCE(EXCLUDED.job_id, pipeline.reviews_enriched.job_id)
            RETURNING id
        """
        enriched_id = await self.db.fetchval(
@@ -97,6 +99,7 @@ class ReviewRepository:
            review["review_time"],
            review["text_language"],
            "v5.1",  # taxonomy_version - will be updated by Stage 2
+            job_id,
        )
        return enriched_id

@@ -213,6 +216,7 @@ class SpanRepository:
        batch_id: str,
        model_version: str,
        taxonomy_version: str,
+        job_id: str | None = None,
    ) -> None:
        """Insert a span into the database."""
        query = """
@@ -224,15 +228,17 @@ class SpanRepository:
                entity, entity_type, entity_normalized,
                relation_type, related_span_id, causal_chain,
                is_primary, is_active, review_time,
-                confidence, usn, taxonomy_version, model_version, ingest_batch_id
+                confidence, usn, taxonomy_version, model_version, ingest_batch_id,
+                job_id
            ) VALUES (
                $1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
                $11, $12, $13, $14, $15, $16, $17, $18, $19, $20,
                $21, $22, $23, $24, $25, $26, $27, $28, $29, $30,
-                $31, $32, $33, $34
+                $31, $32, $33, $34, $35::uuid
            )
            ON CONFLICT (span_id) DO UPDATE SET
-                is_active = EXCLUDED.is_active
+                is_active = EXCLUDED.is_active,
+                job_id = COALESCE(EXCLUDED.job_id, pipeline.review_spans.job_id)
        """
        # Build related_span_id from index if needed
        related_span_id = None
@@ -276,6 +282,7 @@ class SpanRepository:
            taxonomy_version,
            model_version,
            batch_id,
+            job_id,
        )

    async def get_unrouted_negative_spans(
@@ -312,6 +319,24 @@ class SpanRepository:
        row = await self.db.fetchrow(query, span_id)
        return dict(row) if row else None

+    async def deactivate_spans_for_job(self, job_id: str) -> int:
+        """Deactivate all spans for a job (used before reclassification).
+
+        Returns the number of spans deactivated.
+        """
+        result = await self.db.execute(
+            """
+            UPDATE pipeline.review_spans
+            SET is_active = FALSE
+            WHERE job_id = $1::uuid AND is_active = TRUE
+            """,
+            job_id,
+        )
+        # Extract count from result string like "UPDATE 42"
+        if result and result.startswith("UPDATE "):
+            return int(result.split()[1])
+        return 0
+

 class IssueRepository:
    """Repository for issue data operations."""
@@ -329,6 +354,7 @@ class IssueRepository:
        entity: str | None,
        entity_normalized: str | None,
        taxonomy_version: str,
+        job_id: str | None = None,
    ) -> bool:
        """Create or update an issue. Returns True if newly created."""
        # First check if exists
@@ -363,8 +389,8 @@ class IssueRepository:
                INSERT INTO pipeline.issues (
                    issue_id, business_id, place_id, primary_subcode, domain,
                    state, priority_score, confidence_score, span_count, max_intensity,
-                    entity, entity_normalized, taxonomy_version
-                ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
+                    entity, entity_normalized, taxonomy_version, job_id
+                ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14::uuid)
                """,
                issue_id,
                business_id,
@@ -379,6 +405,7 @@ class IssueRepository:
                entity,
                entity_normalized,
                taxonomy_version,
+                job_id,
            )
            return True

@@ -448,6 +475,41 @@ class IssueRepository:
            span_id,
        )

+    async def delete_issues_for_job(self, job_id: str) -> int:
+        """Delete all issues for a job (used before reclassification).
+
+        Also deletes related issue_spans and issue_events.
+        Returns the number of issues deleted.
+        """
+        # First delete related records
+        await self.db.execute(
+            """
+            DELETE FROM pipeline.issue_spans
+            WHERE issue_id IN (
+                SELECT issue_id FROM pipeline.issues WHERE job_id = $1::uuid
+            )
+            """,
+            job_id,
+        )
+        await self.db.execute(
+            """
+            DELETE FROM pipeline.issue_events
+            WHERE issue_id IN (
+                SELECT issue_id FROM pipeline.issues WHERE job_id = $1::uuid
+            )
+            """,
+            job_id,
+        )
+        # Then delete issues
+        result = await self.db.execute(
+            "DELETE FROM pipeline.issues WHERE job_id = $1::uuid",
+            job_id,
+        )
+        # Extract count from result string like "DELETE 42"
+        if result and result.startswith("DELETE "):
+            return int(result.split()[1])
+        return 0
+

 class FactRepository:
    """Repository for fact time series operations."""
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/reputation_pipeline.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/reputation_pipeline.py
@@ -0,0 +1,764 @@
+"""
+Reputation Pipeline - Primitives-based classification and reputation analytics.
+
+This pipeline uses the new primitives taxonomy (MANNER, SPEED, VALUE_FOR_MONEY, etc.)
+instead of the legacy URT codes. It powers the Reputation Report product.
+
+Stages:
+- classify: LLM-powered span extraction with primitives (stored in detected_spans_v2)
+- report: Generate reputation report JSON
+
+Usage:
+    pipeline = ReputationPipeline()
+    await pipeline.initialize()
+    result = await pipeline.process({"business_id": "Go Karts Mar Menor", "days": 365})
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import time
+import uuid
+from datetime import datetime, timedelta
+from typing import TYPE_CHECKING, Any
+
+from pipeline_core import (
+    BasePipeline,
+    DashboardConfig,
+    DashboardSection,
+    PipelineMetadata,
+    PipelineResult as BasePipelineResult,
+    StageResult,
+    WidgetConfig,
+)
+
+from reviewiq_pipeline.config import Config
+from reviewiq_pipeline.db.connection import DatabasePool
+
+if TYPE_CHECKING:
+    import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# Stage names
+STAGE_NAMES = ["classify", "report"]
+
+# Domain mapping for primitives
+DOMAIN_MAP = {
+    # Output/Product (O)
+    "TASTE": "O", "CRAFT": "O", "FRESHNESS": "O", "TEMPERATURE": "O",
+    "EFFECTIVENESS": "O", "ACCURACY": "O", "CONDITION": "O", "CONSISTENCY": "O",
+    # People/Service (P)
+    "MANNER": "P", "COMPETENCE": "P", "ATTENTIVENESS": "P", "COMMUNICATION": "P",
+    # Journey/Process (J)
+    "SPEED": "J", "FRICTION": "J", "RELIABILITY": "J", "AVAILABILITY": "J",
+    # Environment (E)
+    "CLEANLINESS": "E", "COMFORT": "E", "SAFETY": "E", "AMBIANCE": "E",
+    "ACCESSIBILITY": "E", "DIGITAL_UX": "E",
+    # Value (V)
+    "PRICE_LEVEL": "V", "PRICE_FAIRNESS": "V", "PRICE_TRANSPARENCY": "V",
+    "VALUE_FOR_MONEY": "V",
+    # Meta
+    "HONESTY": "meta", "ETHICS": "meta", "PROMISES": "meta",
+    "ACKNOWLEDGMENT": "meta", "RESPONSE_QUALITY": "meta", "RECOVERY": "meta",
+    "RETURN_INTENT": "meta", "RECOMMEND": "meta", "RECOGNITION": "meta",
+    "UNMAPPED": "meta", "NON_INFORMATIVE": "meta",
+}
+
+DOMAIN_NAMES = {
+    "O": "Output/Product",
+    "P": "People/Service",
+    "J": "Journey/Process",
+    "E": "Environment",
+    "V": "Value",
+    "meta": "Meta",
+}
+
+
+class ReputationPipeline(BasePipeline):
+    """
+    Reputation Pipeline - Primitives-based classification and analytics.
+
+    Uses the new primitives taxonomy (37 primitives across 5 domains + meta)
+    for more actionable, business-friendly insights.
+    """
+
+    def __init__(self, config: Config | None = None):
+        """Initialize the pipeline."""
+        self._config = config or Config()
+        self._db: DatabasePool | None = None
+        self._initialized = False
+
+    @property
+    def config(self) -> Config:
+        """Get pipeline configuration."""
+        return self._config
+
+    @property
+    def metadata(self) -> PipelineMetadata:
+        """Get pipeline metadata."""
+        return PipelineMetadata(
+            id="reputation",
+            name="Reputation Analytics Pipeline",
+            description="Primitives-based classification and reputation scoring. Generates business-facing analytics reports.",
+            version="2.0.0",
+            stages=STAGE_NAMES,
+            input_type="BusinessInput",
+        )
+
+    async def initialize(self) -> None:
+        """Initialize database connections."""
+        if self._initialized:
+            return
+
+        logger.info("Initializing Reputation pipeline...")
+
+        self._db = DatabasePool(self._config)
+        await self._db.initialize()
+
+        self._initialized = True
+        logger.info("Reputation pipeline initialized")
+
+    async def close(self) -> None:
+        """Close all connections."""
+        if self._db:
+            await self._db.close()
+            self._db = None
+
+        self._initialized = False
+        logger.info("Reputation pipeline closed")
+
+    async def process(
+        self,
+        input_data: dict[str, Any],
+        stages: list[str] | None = None,
+    ) -> BasePipelineResult:
+        """
+        Process input data through the pipeline.
+
+        Args:
+            input_data: Must contain business_id OR job_id. Optional: days, start, end
+            stages: List of stage names to run (default: all)
+
+        Returns:
+            BasePipelineResult with stage outputs
+        """
+        await self.initialize()
+
+        stages = stages or STAGE_NAMES
+        stages_run: list[str] = []
+        stage_results: dict[str, StageResult] = {}
+
+        business_id = input_data.get("business_id")
+        job_id = input_data.get("job_id")
+
+        # Resolve business_id from job_id if not provided directly
+        if not business_id and job_id:
+            try:
+                async with self._db.pool.acquire() as conn:
+                    row = await conn.fetchrow(
+                        "SELECT business_name FROM jobs WHERE job_id = $1",
+                        uuid.UUID(job_id) if isinstance(job_id, str) else job_id,
+                    )
+                    if row and row["business_name"]:
+                        business_id = row["business_name"]
+                        logger.info(f"Resolved business_id '{business_id}' from job_id '{job_id}'")
+            except Exception as e:
+                logger.warning(f"Failed to resolve business_id from job_id: {e}")
+
+        if not business_id:
+            return BasePipelineResult(
+                pipeline_id="reputation",
+                stages_run=[],
+                stage_results={},
+                success=False,
+                error="business_id is required (provide business_id or job_id)",
+            )
+
+        # Parse time window
+        days = input_data.get("days", 365)
+        end_date = datetime.utcnow()
+        start_date = end_date - timedelta(days=days)
+
+        if input_data.get("start"):
+            start_date = datetime.fromisoformat(input_data["start"])
+        if input_data.get("end"):
+            end_date = datetime.fromisoformat(input_data["end"])
+
+        try:
+            # Stage: Classify (uses existing spans from detected_spans_v2)
+            if "classify" in stages:
+                start = time.time()
+                logger.info(f"Running Classification check for {business_id}")
+
+                try:
+                    classify_result = await self._check_classification(
+                        business_id, start_date, end_date
+                    )
+                    duration_ms = int((time.time() - start) * 1000)
+                    stages_run.append("classify")
+                    stage_results["classify"] = StageResult(
+                        stage="classify",
+                        success=True,
+                        data=classify_result,
+                        error=None,
+                        duration_ms=duration_ms,
+                    )
+                except Exception as e:
+                    logger.exception("Classification check failed")
+                    stage_results["classify"] = StageResult(
+                        stage="classify",
+                        success=False,
+                        data={},
+                        error=str(e),
+                        duration_ms=int((time.time() - start) * 1000),
+                    )
+
+            # Stage: Report (generate reputation report)
+            if "report" in stages:
+                start = time.time()
+                logger.info(f"Generating Reputation Report for {business_id}")
+
+                try:
+                    report_result = await self._generate_report(
+                        business_id, start_date, end_date
+                    )
+                    duration_ms = int((time.time() - start) * 1000)
+                    stages_run.append("report")
+                    stage_results["report"] = StageResult(
+                        stage="report",
+                        success=True,
+                        data=report_result,
+                        error=None,
+                        duration_ms=duration_ms,
+                    )
+                except Exception as e:
+                    logger.exception("Report generation failed")
+                    stage_results["report"] = StageResult(
+                        stage="report",
+                        success=False,
+                        data={},
+                        error=str(e),
+                        duration_ms=int((time.time() - start) * 1000),
+                    )
+
+            return BasePipelineResult(
+                pipeline_id="reputation",
+                stages_run=stages_run,
+                stage_results=stage_results,
+                success=all(r["success"] for r in stage_results.values()),
+            )
+
+        except Exception as e:
+            logger.exception("Pipeline failed with unexpected error")
+            return BasePipelineResult(
+                pipeline_id="reputation",
+                stages_run=stages_run,
+                stage_results=stage_results,
+                success=False,
+                error=str(e),
+            )
+
+    async def _check_classification(
+        self,
+        business_id: str,
+        start_date: datetime,
+        end_date: datetime,
+    ) -> dict[str, Any]:
+        """Check classification coverage for the business."""
+        if not self._db:
+            return {"error": "Database not initialized"}
+
+        async with self._db.pool.acquire() as conn:
+            # Get span counts
+            row = await conn.fetchrow(
+                """
+                SELECT
+                    COUNT(*) as total_spans,
+                    COUNT(*) FILTER (WHERE valence = '+') as positive,
+                    COUNT(*) FILTER (WHERE valence = '-') as negative,
+                    COUNT(*) FILTER (WHERE valence = '0') as neutral,
+                    COUNT(*) FILTER (WHERE valence = '±') as mixed,
+                    COUNT(*) FILTER (WHERE primitive = 'UNMAPPED') as unmapped,
+                    COUNT(*) FILTER (WHERE primitive = 'NON_INFORMATIVE') as non_informative,
+                    COUNT(DISTINCT s.review_id) as reviews_with_spans
+                FROM pipeline.detected_spans_v2 s
+                JOIN pipeline.review_facts_v1 f
+                    ON f.review_id = s.review_id AND f.business_id = s.business_id
+                WHERE s.business_id = $1
+                    AND f.review_time_utc >= $2
+                    AND f.review_time_utc < $3
+                """,
+                business_id,
+                start_date,
+                end_date,
+            )
+
+            if not row or row["total_spans"] == 0:
+                return {
+                    "status": "no_data",
+                    "message": "No classified spans found for this business/period",
+                    "total_spans": 0,
+                }
+
+            total = row["total_spans"]
+            unmapped_rate = row["unmapped"] / total if total > 0 else 0
+
+            return {
+                "status": "ok" if unmapped_rate < 0.10 else "needs_attention",
+                "total_spans": total,
+                "reviews_with_spans": row["reviews_with_spans"],
+                "positive_count": row["positive"],
+                "negative_count": row["negative"],
+                "neutral_count": row["neutral"],
+                "mixed_count": row["mixed"],
+                "unmapped_count": row["unmapped"],
+                "non_informative_count": row["non_informative"],
+                "unmapped_rate": round(unmapped_rate * 100, 1),
+            }
+
+    async def _generate_report(
+        self,
+        business_id: str,
+        start_date: datetime,
+        end_date: datetime,
+    ) -> dict[str, Any]:
+        """Generate a reputation report summary."""
+        if not self._db:
+            return {"error": "Database not initialized"}
+
+        async with self._db.pool.acquire() as conn:
+            # Get overall scores
+            row = await conn.fetchrow(
+                """
+                WITH span_data AS (
+                    SELECT
+                        s.primitive,
+                        s.valence,
+                        s.confidence,
+                        s.intensity,
+                        CASE s.valence
+                            WHEN '+' THEN 1
+                            WHEN '-' THEN -1
+                            ELSE 0
+                        END as valence_num
+                    FROM pipeline.detected_spans_v2 s
+                    JOIN pipeline.review_facts_v1 f
+                        ON f.review_id = s.review_id AND f.business_id = s.business_id
+                    WHERE s.business_id = $1
+                        AND f.review_time_utc >= $2
+                        AND f.review_time_utc < $3
+                        AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
+                )
+                SELECT
+                    COUNT(*) as content_spans,
+                    ROUND(
+                        100.0 * SUM(valence_num * confidence * intensity) /
+                        NULLIF(SUM(confidence * intensity), 0),
+                        1
+                    ) as overall_score,
+                    ROUND(100.0 * COUNT(*) FILTER (WHERE valence = '+') / NULLIF(COUNT(*), 0), 1) as positive_share
+                FROM span_data
+                """,
+                business_id,
+                start_date,
+                end_date,
+            )
+
+            if not row or row["content_spans"] == 0:
+                return {
+                    "status": "no_data",
+                    "message": "No content spans found",
+                }
+
+            # Get domain breakdown
+            domain_rows = await conn.fetch(
+                """
+                SELECT
+                    s.primitive,
+                    COUNT(*) as count,
+                    ROUND(
+                        100.0 * SUM(
+                            CASE s.valence WHEN '+' THEN 1 WHEN '-' THEN -1 ELSE 0 END
+                            * s.confidence * s.intensity
+                        ) / NULLIF(SUM(s.confidence * s.intensity), 0),
+                        1
+                    ) as score
+                FROM pipeline.detected_spans_v2 s
+                JOIN pipeline.review_facts_v1 f
+                    ON f.review_id = s.review_id AND f.business_id = s.business_id
+                WHERE s.business_id = $1
+                    AND f.review_time_utc >= $2
+                    AND f.review_time_utc < $3
+                    AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
+                GROUP BY s.primitive
+                ORDER BY count DESC
+                """,
+                business_id,
+                start_date,
+                end_date,
+            )
+
+            # Aggregate by domain
+            domain_scores = {}
+            primitive_scores = {}
+            for r in domain_rows:
+                prim = r["primitive"]
+                domain = DOMAIN_MAP.get(prim, "meta")
+
+                primitive_scores[prim] = {
+                    "domain": domain,
+                    "score": float(r["score"]) if r["score"] else 0,
+                    "volume": r["count"],
+                }
+
+                if domain not in domain_scores:
+                    domain_scores[domain] = {"total_score": 0, "total_volume": 0}
+                domain_scores[domain]["total_score"] += (r["score"] or 0) * r["count"]
+                domain_scores[domain]["total_volume"] += r["count"]
+
+            # Calculate domain averages
+            domains = {}
+            for domain, data in domain_scores.items():
+                if data["total_volume"] > 0:
+                    domains[domain] = {
+                        "name": DOMAIN_NAMES.get(domain, domain),
+                        "score": round(data["total_score"] / data["total_volume"], 1),
+                        "volume": data["total_volume"],
+                    }
+
+            # Get top drivers
+            top_positive = await conn.fetch(
+                """
+                SELECT
+                    s.primitive,
+                    COUNT(*) as count,
+                    ROUND(100.0 * COUNT(*) / (
+                        SELECT COUNT(*) FROM pipeline.detected_spans_v2 s2
+                        JOIN pipeline.review_facts_v1 f2 ON f2.review_id = s2.review_id AND f2.business_id = s2.business_id
+                        WHERE s2.business_id = $1 AND s2.valence = '+'
+                            AND f2.review_time_utc >= $2 AND f2.review_time_utc < $3
+                    ), 1) as impact
+                FROM pipeline.detected_spans_v2 s
+                JOIN pipeline.review_facts_v1 f
+                    ON f.review_id = s.review_id AND f.business_id = s.business_id
+                WHERE s.business_id = $1 AND s.valence = '+'
+                    AND f.review_time_utc >= $2 AND f.review_time_utc < $3
+                    AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
+                GROUP BY s.primitive
+                ORDER BY count DESC
+                LIMIT 5
+                """,
+                business_id,
+                start_date,
+                end_date,
+            )
+
+            top_negative = await conn.fetch(
+                """
+                SELECT
+                    s.primitive,
+                    COUNT(*) as count,
+                    ROUND(100.0 * COUNT(*) / NULLIF((
+                        SELECT COUNT(*) FROM pipeline.detected_spans_v2 s2
+                        JOIN pipeline.review_facts_v1 f2 ON f2.review_id = s2.review_id AND f2.business_id = s2.business_id
+                        WHERE s2.business_id = $1 AND s2.valence = '-'
+                            AND f2.review_time_utc >= $2 AND f2.review_time_utc < $3
+                    ), 0), 1) as impact
+                FROM pipeline.detected_spans_v2 s
+                JOIN pipeline.review_facts_v1 f
+                    ON f.review_id = s.review_id AND f.business_id = s.business_id
+                WHERE s.business_id = $1 AND s.valence = '-'
+                    AND f.review_time_utc >= $2 AND f.review_time_utc < $3
+                    AND s.primitive NOT IN ('UNMAPPED', 'NON_INFORMATIVE')
+                GROUP BY s.primitive
+                ORDER BY count DESC
+                LIMIT 5
+                """,
+                business_id,
+                start_date,
+                end_date,
+            )
+
+            return {
+                "status": "ok",
+                "business_id": business_id,
+                "window": {
+                    "start": start_date.isoformat(),
+                    "end": end_date.isoformat(),
+                },
+                "scores": {
+                    "overall": float(row["overall_score"]) if row["overall_score"] else 0,
+                    "positive_share": float(row["positive_share"]) if row["positive_share"] else 0,
+                    "content_spans": row["content_spans"],
+                },
+                "domains": domains,
+                "primitives": primitive_scores,
+                "drivers": {
+                    "positives": [
+                        {"primitive": r["primitive"], "count": r["count"], "impact": float(r["impact"]) if r["impact"] else 0}
+                        for r in top_positive
+                    ],
+                    "negatives": [
+                        {"primitive": r["primitive"], "count": r["count"], "impact": float(r["impact"]) if r["impact"] else 0}
+                        for r in top_negative
+                    ],
+                },
+            }
+
+    def get_dashboard_config(self) -> DashboardConfig:
+        """Get the dashboard configuration for Reputation Pipeline."""
+        return DashboardConfig(
+            pipeline_id="reputation",
+            title="Reputation Analytics",
+            description="Primitives-based reputation scoring and business insights",
+            sections=[
+                DashboardSection(
+                    id="overview",
+                    title="Reputation Overview",
+                    description="Overall reputation score and key metrics",
+                    widgets=[
+                        WidgetConfig(
+                            id="reputation_score",
+                            type="stat_card",
+                            title="Reputation Score",
+                            grid={"x": 0, "y": 0, "w": 3, "h": 1},
+                            config={
+                                "value_key": "overall_score",
+                                "format": "{value:.0f}",
+                                "icon": "trending-up",
+                                "color": "blue",
+                            },
+                        ),
+                        WidgetConfig(
+                            id="positive_share",
+                            type="stat_card",
+                            title="Positive Share",
+                            grid={"x": 3, "y": 0, "w": 3, "h": 1},
+                            config={
+                                "value_key": "positive_share",
+                                "format": "{value:.1f}%",
+                                "icon": "thumbs-up",
+                                "color": "green",
+                            },
+                        ),
+                        WidgetConfig(
+                            id="content_spans",
+                            type="stat_card",
+                            title="Content Spans",
+                            grid={"x": 6, "y": 0, "w": 3, "h": 1},
+                            config={
+                                "value_key": "content_spans",
+                                "format": "{value:,}",
+                                "icon": "message-square",
+                                "color": "purple",
+                            },
+                        ),
+                        WidgetConfig(
+                            id="unmapped_rate",
+                            type="stat_card",
+                            title="Unmapped Rate",
+                            grid={"x": 9, "y": 0, "w": 3, "h": 1},
+                            config={
+                                "value_key": "unmapped_rate",
+                                "format": "{value:.1f}%",
+                                "icon": "alert-circle",
+                                "color": "orange",
+                            },
+                        ),
+                    ],
+                    collapsed=False,
+                ),
+                DashboardSection(
+                    id="domains",
+                    title="Domain Breakdown",
+                    description="Performance across experience domains",
+                    widgets=[
+                        WidgetConfig(
+                            id="domain_scores",
+                            type="bar_chart",
+                            title="Domain Scores",
+                            grid={"x": 0, "y": 0, "w": 6, "h": 2},
+                            config={
+                                "x_axis": {"key": "domain", "type": "category"},
+                                "y_axis": {"key": "score", "label": "Score"},
+                                "series": [{"key": "score", "name": "Score"}],
+                            },
+                        ),
+                        WidgetConfig(
+                            id="domain_volume",
+                            type="pie_chart",
+                            title="Mentions by Domain",
+                            grid={"x": 6, "y": 0, "w": 6, "h": 2},
+                            config={
+                                "value_key": "volume",
+                                "label_key": "name",
+                                "show_legend": True,
+                            },
+                        ),
+                    ],
+                    collapsed=False,
+                ),
+                DashboardSection(
+                    id="drivers",
+                    title="Key Drivers",
+                    description="Top positive and negative drivers",
+                    widgets=[
+                        WidgetConfig(
+                            id="positive_drivers",
+                            type="bar_chart",
+                            title="Top Strengths",
+                            grid={"x": 0, "y": 0, "w": 6, "h": 2},
+                            config={
+                                "x_axis": {"key": "primitive", "type": "category"},
+                                "y_axis": {"key": "impact", "label": "Impact %"},
+                                "series": [{"key": "impact", "name": "Impact", "color": "#22c55e"}],
+                            },
+                        ),
+                        WidgetConfig(
+                            id="negative_drivers",
+                            type="bar_chart",
+                            title="Top Weaknesses",
+                            grid={"x": 6, "y": 0, "w": 6, "h": 2},
+                            config={
+                                "x_axis": {"key": "primitive", "type": "category"},
+                                "y_axis": {"key": "impact", "label": "Impact %"},
+                                "series": [{"key": "impact", "name": "Impact", "color": "#ef4444"}],
+                            },
+                        ),
+                    ],
+                    collapsed=False,
+                ),
+                DashboardSection(
+                    id="primitives",
+                    title="Primitive Analysis",
+                    description="Detailed breakdown by primitive",
+                    widgets=[
+                        WidgetConfig(
+                            id="primitives_table",
+                            type="table",
+                            title="All Primitives",
+                            grid={"x": 0, "y": 0, "w": 12, "h": 3},
+                            config={
+                                "columns": [
+                                    {"key": "primitive", "header": "Primitive", "width": 150},
+                                    {"key": "domain", "header": "Domain", "width": 100},
+                                    {"key": "score", "header": "Score", "width": 80, "align": "right"},
+                                    {"key": "volume", "header": "Mentions", "width": 80, "align": "right"},
+                                ],
+                                "row_key": "primitive",
+                                "page_size": 15,
+                                "sortable": True,
+                            },
+                        ),
+                    ],
+                    collapsed=True,
+                ),
+            ],
+            default_time_range="365d",
+            refresh_interval=600,
+        )
+
+    async def get_widget_data(
+        self,
+        widget_id: str,
+        params: dict[str, Any],
+    ) -> dict[str, Any]:
+        """Get data for a specific dashboard widget."""
+        await self.initialize()
+
+        business_id = params.get("business_id")
+        if not business_id:
+            return {"error": "business_id required"}
+
+        days = 365
+        time_range = params.get("time_range", "365d")
+        if time_range.endswith("d"):
+            days = int(time_range[:-1])
+
+        end_date = datetime.utcnow()
+        start_date = end_date - timedelta(days=days)
+
+        # Get classification check data
+        classify_data = await self._check_classification(business_id, start_date, end_date)
+
+        # Get report data
+        report_data = await self._generate_report(business_id, start_date, end_date)
+
+        match widget_id:
+            # Overview stats
+            case "reputation_score":
+                return {"overall_score": report_data.get("scores", {}).get("overall", 0)}
+            case "positive_share":
+                return {"positive_share": report_data.get("scores", {}).get("positive_share", 0)}
+            case "content_spans":
+                return {"content_spans": report_data.get("scores", {}).get("content_spans", 0)}
+            case "unmapped_rate":
+                return {"unmapped_rate": classify_data.get("unmapped_rate", 0)}
+
+            # Domain charts
+            case "domain_scores":
+                domains = report_data.get("domains", {})
+                return {"data": [{"domain": k, **v} for k, v in domains.items()]}
+            case "domain_volume":
+                domains = report_data.get("domains", {})
+                return {"data": [{"name": v["name"], "volume": v["volume"]} for v in domains.values()]}
+
+            # Driver charts
+            case "positive_drivers":
+                return {"data": report_data.get("drivers", {}).get("positives", [])}
+            case "negative_drivers":
+                return {"data": report_data.get("drivers", {}).get("negatives", [])}
+
+            # Primitives table
+            case "primitives_table":
+                primitives = report_data.get("primitives", {})
+                return {
+                    "data": [
+                        {"primitive": k, **v}
+                        for k, v in primitives.items()
+                    ],
+                    "total": len(primitives),
+                }
+
+            case _:
+                logger.warning(f"Unknown widget: {widget_id}")
+                return {"error": f"Unknown widget: {widget_id}"}
+
+    async def health_check(self) -> dict[str, Any]:
+        """Check pipeline health."""
+        await self.initialize()
+
+        checks = {}
+        healthy = True
+
+        # Check database connection
+        try:
+            if self._db:
+                async with self._db.pool.acquire() as conn:
+                    await conn.fetchval("SELECT 1")
+                checks["database"] = "ok"
+            else:
+                checks["database"] = "not_initialized"
+                healthy = False
+        except Exception as e:
+            checks["database"] = str(e)
+            healthy = False
+
+        # Check spans table exists
+        try:
+            if self._db:
+                async with self._db.pool.acquire() as conn:
+                    count = await conn.fetchval(
+                        "SELECT COUNT(*) FROM pipeline.detected_spans_v2 LIMIT 1"
+                    )
+                checks["spans_table"] = "ok"
+        except Exception as e:
+            checks["spans_table"] = str(e)
+            healthy = False
+
+        return {
+            "healthy": healthy,
+            "checks": checks,
+        }
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/init.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/init.py
@@ -2,10 +2,22 @@

 from reviewiq_pipeline.services.embeddings import EmbeddingService
 from reviewiq_pipeline.services.llm_client import LLMClient
+from reviewiq_pipeline.services.review_router import (
+    ReviewRouter,
+    RouterConfig,
+    RoutingDecision,
+    RoutingTier,
+    create_router,
+)
 from reviewiq_pipeline.services.text_processor import TextProcessor

 __all__ = [
    "LLMClient",
    "EmbeddingService",
    "TextProcessor",
+    "ReviewRouter",
+    "RouterConfig",
+    "RoutingDecision",
+    "RoutingTier",
+    "create_router",
 ]
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/category_resolver.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/category_resolver.py
@@ -0,0 +1,392 @@
+"""
+Category Resolver Service
+
+Resolves business categories to the deepest node in the GBP taxonomy.
+Uses a multi-phase approach:
+1. Exact match from Google's category
+2. LLM matching when no exact match
+3. Hierarchical LLM classification when no Google category
+
+This is critical for the classification pipeline as it provides context
+for understanding and categorizing reviews.
+"""
+import asyncio
+import logging
+from dataclasses import dataclass
+from typing import Optional
+import asyncpg
+
+from .llm_client import LLMClient
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class ResolvedCategory:
+    """Result of category resolution."""
+    category_id: int
+    path: str  # ltree path as string
+    name: str
+    level: int
+    method: str  # 'exact', 'llm', 'hierarchical'
+    confidence: float  # 0.0 - 1.0
+
+
+class CategoryResolver:
+    """
+    Resolves business categories to GBP taxonomy nodes.
+
+    Usage:
+        resolver = CategoryResolver(db_pool, llm_client)
+
+        # With Google category
+        result = await resolver.resolve("Toy store")
+        # -> ResolvedCategory(path="Retail.Stores.Toy_store", method="exact")
+
+        # Without Google category (infer from name)
+        result = await resolver.resolve(None, business_name="Pura Vida Hostel")
+        # -> ResolvedCategory(path="Travel_Hospitality.Hotels.Hostel", method="hierarchical")
+    """
+
+    def __init__(self, pool: asyncpg.Pool, llm_client: Optional[LLMClient] = None):
+        self.pool = pool
+        self.llm = llm_client
+        self._level1_cache: list[dict] = []
+        self._level2_cache: dict[str, list[dict]] = {}
+        self._level3_cache: dict[str, list[dict]] = {}
+
+    async def resolve(
+        self,
+        google_category: Optional[str] = None,
+        business_name: Optional[str] = None,
+        business_address: Optional[str] = None
+    ) -> Optional[ResolvedCategory]:
+        """
+        Resolve to the deepest taxonomy node.
+
+        Args:
+            google_category: Category from Google Maps (e.g., "Toy store")
+            business_name: Business name for inference if no Google category
+            business_address: Address for additional context
+
+        Returns:
+            ResolvedCategory or None if resolution failed
+        """
+        # Phase 1: Try exact match if we have Google category
+        if google_category:
+            result = await self._exact_match(google_category)
+            if result:
+                log.info(f"Exact match: '{google_category}' -> {result.path}")
+                return result
+
+            # Phase 2: LLM matching for Google category
+            if self.llm:
+                result = await self._llm_match(google_category)
+                if result:
+                    log.info(f"LLM match: '{google_category}' -> {result.path}")
+                    return result
+
+        # Phase 3: Hierarchical classification from business name
+        if business_name and self.llm:
+            result = await self._hierarchical_classify(
+                business_name=business_name,
+                business_address=business_address,
+                google_category=google_category  # May be None or unmatched
+            )
+            if result:
+                log.info(f"Hierarchical: '{business_name}' -> {result.path}")
+                return result
+
+        log.warning(f"Could not resolve category for: {google_category or business_name}")
+        return None
+
+    async def _exact_match(self, google_category: str) -> Optional[ResolvedCategory]:
+        """Try exact match against taxonomy."""
+        async with self.pool.acquire() as conn:
+            # Try exact match (case-insensitive)
+            row = await conn.fetchrow("""
+                SELECT id, name, path::text as path, level
+                FROM gbp_categories
+                WHERE LOWER(name) = LOWER($1) AND level = 3
+            """, google_category)
+
+            if row:
+                return ResolvedCategory(
+                    category_id=row['id'],
+                    path=row['path'],
+                    name=row['name'],
+                    level=row['level'],
+                    method='exact',
+                    confidence=1.0
+                )
+
+            # Try fuzzy match (contains)
+            row = await conn.fetchrow("""
+                SELECT id, name, path::text as path, level
+                FROM gbp_categories
+                WHERE LOWER(name) LIKE LOWER($1) AND level = 3
+                ORDER BY length(name) ASC
+                LIMIT 1
+            """, f"%{google_category}%")
+
+            if row:
+                return ResolvedCategory(
+                    category_id=row['id'],
+                    path=row['path'],
+                    name=row['name'],
+                    level=row['level'],
+                    method='exact',
+                    confidence=0.9
+                )
+
+        return None
+
+    async def _llm_match(self, google_category: str) -> Optional[ResolvedCategory]:
+        """Use LLM to match Google category to taxonomy."""
+        # Get candidate categories (level 3) that might match
+        async with self.pool.acquire() as conn:
+            # Get categories with similar words
+            words = google_category.lower().split()
+            conditions = " OR ".join([f"LOWER(name) LIKE '%{w}%'" for w in words if len(w) > 2])
+
+            if not conditions:
+                return None
+
+            candidates = await conn.fetch(f"""
+                SELECT id, name, path::text as path, level
+                FROM gbp_categories
+                WHERE ({conditions}) AND level = 3
+                ORDER BY name
+                LIMIT 20
+            """)
+
+            if not candidates:
+                # Get random sample for LLM to choose from
+                candidates = await conn.fetch("""
+                    SELECT id, name, path::text as path, level
+                    FROM gbp_categories
+                    WHERE level = 3
+                    ORDER BY RANDOM()
+                    LIMIT 50
+                """)
+
+        if not candidates:
+            return None
+
+        # Ask LLM to pick best match
+        candidate_list = "\n".join([f"- {c['name']} ({c['path']})" for c in candidates])
+
+        prompt = f"""Given the Google Maps business category "{google_category}", select the BEST matching category from this taxonomy list.
+
+Candidates:
+{candidate_list}
+
+Respond with ONLY the exact category name from the list, nothing else.
+If none match well, respond with "NONE"."""
+
+        response = await self.llm.complete(prompt, max_tokens=50)
+        selected_name = response.strip().strip('"').strip("'")
+
+        if selected_name == "NONE":
+            return None
+
+        # Find the selected category
+        for c in candidates:
+            if c['name'].lower() == selected_name.lower():
+                return ResolvedCategory(
+                    category_id=c['id'],
+                    path=c['path'],
+                    name=c['name'],
+                    level=c['level'],
+                    method='llm',
+                    confidence=0.8
+                )
+
+        return None
+
+    async def _hierarchical_classify(
+        self,
+        business_name: str,
+        business_address: Optional[str] = None,
+        google_category: Optional[str] = None
+    ) -> Optional[ResolvedCategory]:
+        """
+        Walk down the taxonomy tree using LLM at each level.
+
+        Level 1 (16 sectors) -> Level 2 (91 types) -> Level 3 (4034 categories)
+        """
+        context = f"Business: {business_name}"
+        if business_address:
+            context += f"\nAddress: {business_address}"
+        if google_category:
+            context += f"\nGoogle category hint: {google_category}"
+
+        # Level 1: Select sector
+        level1_categories = await self._get_level_categories(1)
+        sector = await self._llm_select_category(
+            context=context,
+            categories=level1_categories,
+            level_name="sector"
+        )
+
+        if not sector:
+            return None
+
+        # Level 2: Select business type within sector
+        level2_categories = await self._get_level_categories(2, parent_path=sector['path'])
+        business_type = await self._llm_select_category(
+            context=context,
+            categories=level2_categories,
+            level_name="business type",
+            parent=sector['name']
+        )
+
+        if not business_type:
+            return None
+
+        # Level 3: Select specific category
+        level3_categories = await self._get_level_categories(3, parent_path=business_type['path'])
+        specific = await self._llm_select_category(
+            context=context,
+            categories=level3_categories,
+            level_name="specific category",
+            parent=business_type['name']
+        )
+
+        if not specific:
+            return None
+
+        return ResolvedCategory(
+            category_id=specific['id'],
+            path=specific['path'],
+            name=specific['name'],
+            level=specific['level'],
+            method='hierarchical',
+            confidence=0.7
+        )
+
+    async def _get_level_categories(
+        self,
+        level: int,
+        parent_path: Optional[str] = None
+    ) -> list[dict]:
+        """Get categories at a specific level, optionally filtered by parent."""
+        cache_key = f"{level}:{parent_path or 'root'}"
+
+        # Check cache
+        if level == 1 and self._level1_cache:
+            return self._level1_cache
+        if level == 2 and parent_path in self._level2_cache:
+            return self._level2_cache[parent_path]
+        if level == 3 and parent_path in self._level3_cache:
+            return self._level3_cache[parent_path]
+
+        async with self.pool.acquire() as conn:
+            if parent_path:
+                rows = await conn.fetch("""
+                    SELECT id, name, path::text as path, level
+                    FROM gbp_categories
+                    WHERE level = $1 AND path <@ $2::ltree
+                    ORDER BY name
+                """, level, parent_path)
+            else:
+                rows = await conn.fetch("""
+                    SELECT id, name, path::text as path, level
+                    FROM gbp_categories
+                    WHERE level = $1
+                    ORDER BY name
+                """, level)
+
+            result = [dict(r) for r in rows]
+
+            # Cache results
+            if level == 1:
+                self._level1_cache = result
+            elif level == 2 and parent_path:
+                self._level2_cache[parent_path] = result
+            elif level == 3 and parent_path:
+                self._level3_cache[parent_path] = result
+
+            return result
+
+    async def _llm_select_category(
+        self,
+        context: str,
+        categories: list[dict],
+        level_name: str,
+        parent: Optional[str] = None
+    ) -> Optional[dict]:
+        """Ask LLM to select best category from list."""
+        if not categories:
+            return None
+
+        # If only one option, return it
+        if len(categories) == 1:
+            return categories[0]
+
+        category_list = "\n".join([f"- {c['name']}" for c in categories])
+
+        parent_context = f" within {parent}" if parent else ""
+
+        prompt = f"""{context}
+
+Select the most appropriate {level_name}{parent_context} for this business.
+
+Options:
+{category_list}
+
+Respond with ONLY the exact category name from the list, nothing else."""
+
+        response = await self.llm.complete(prompt, max_tokens=50)
+        selected_name = response.strip().strip('"').strip("'")
+
+        # Find the selected category
+        for c in categories:
+            if c['name'].lower() == selected_name.lower():
+                return c
+
+        # Fuzzy match if exact not found
+        for c in categories:
+            if selected_name.lower() in c['name'].lower() or c['name'].lower() in selected_name.lower():
+                return c
+
+        # Return first as fallback
+        log.warning(f"LLM selected '{selected_name}' not in list, using first option")
+        return categories[0] if categories else None
+
+
+async def resolve_job_category(
+    pool: asyncpg.Pool,
+    llm_client: LLMClient,
+    job_id: str,
+    google_category: Optional[str],
+    business_name: Optional[str],
+    business_address: Optional[str] = None
+) -> Optional[ResolvedCategory]:
+    """
+    Resolve and save category for a job.
+
+    This is the main entry point for pre-flight category resolution.
+    """
+    resolver = CategoryResolver(pool, llm_client)
+    result = await resolver.resolve(
+        google_category=google_category,
+        business_name=business_name,
+        business_address=business_address
+    )
+
+    if result:
+        # Save to database
+        async with pool.acquire() as conn:
+            await conn.execute("""
+                UPDATE jobs
+                SET gbp_category_id = $2,
+                    gbp_category_path = $3::ltree,
+                    category_resolution_method = $4,
+                    updated_at = NOW()
+                WHERE job_id = $1::uuid
+            """, job_id, result.category_id, result.path, result.method)
+
+        log.info(f"Job {job_id}: resolved category to {result.path} ({result.method})")
+
+    return result
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/classification_validator.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/classification_validator.py
@@ -0,0 +1,210 @@
+"""
+Classification validator for post-LLM validation.
+
+Catches common misclassification patterns based on keyword detection
+and suggests corrections before persisting to database.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Price/money indicators → should be V codes
+PRICE_PATTERNS = [
+    r'\b\d+\s*[€$£]\b',  # "50€", "100$"
+    r'\b[€$£]\s*\d+\b',  # "€50", "$100"
+    r'\beur(o|os)?\b',
+    r'\bprice[sd]?\b',
+    r'\bcost[s]?\b',
+    r'\bfee[s]?\b',
+    r'\bcharge[sd]?\b',
+    r'\bdeposit[s]?\b',
+    r'\brefund[s]?\b',
+    r'\bcheap\b',
+    r'\bexpensive\b',
+    r'\baffordable\b',
+    r'\bpreis\b',  # German
+    r'\bprecio[s]?\b',  # Spanish
+    r'\bgünstig\b',  # German "cheap"
+    r'\bteuer\b',  # German "expensive"
+    r'\bbarato\b',  # Spanish "cheap"
+    r'\bcaro\b',  # Spanish "expensive"
+]
+
+# Staff behavior indicators → should be P codes
+STAFF_PATTERNS = [
+    r'\bfriendly\b',
+    r'\brude\b',
+    r'\bhelpful\b',
+    r'\bpatient\b',
+    r'\bimpatient\b',
+    r'\bwelcoming\b',
+    r'\battentive\b',
+    r'\bprofessional\b',
+    r'\bunprofessional\b',
+    r'\bamable\b',  # Spanish "friendly"
+    r'\bsimpático\b',  # Spanish
+    r'\bmuy amable\b',
+    r'\bnett\b',  # German "nice"
+    r'\bfreundlich\b',  # German "friendly"
+    r'\bunfreundlich\b',  # German "unfriendly"
+    r'\bgentil\b',  # French/Spanish
+]
+
+# Scam/ethics indicators → should be R codes
+ETHICS_PATTERNS = [
+    r'\bscam\b',
+    r'\bfraud\b',
+    r'\bcheat\b',
+    r'\bdishonest\b',
+    r'\blied\b',
+    r'\blie[s]?\b',
+    r'\bscammer[s]?\b',
+    r'\bsteal\b',
+    r'\bstole\b',
+    r'\brobber[y]?\b',
+    r'\bestafa\b',  # Spanish "scam"
+    r'\btramp[a]?\b',  # Spanish "trap/trick"
+    r'\bengaño\b',  # Spanish "deception"
+    r'\bAbzocker\b',  # German "rip-off"
+    r'\bBetrug\b',  # German "fraud"
+    r'\barnaque\b',  # French "scam"
+    r'\bvoleur[s]?\b',  # French "thief"
+]
+
+# Wayfinding indicators → should be A1.04
+WAYFINDING_PATTERNS = [
+    r"\bcouldn'?t find\b",
+    r'\bhard to find\b',
+    r'\bdifficult to find\b',
+    r'\bconfusing\b.*\b(direction|location|shuttle)\b',
+    r'\blost\b',
+    r'\bno signs?\b',
+    r'\bno señal\b',  # Spanish
+    r'\bkeine Schilder\b',  # German
+]
+
+
+def validate_classification(
+    span_text: str,
+    urt_code: str,
+    valence: str,
+) -> dict[str, Any] | None:
+    """
+    Validate a classification and suggest correction if needed.
+
+    Args:
+        span_text: The span text
+        urt_code: The assigned URT code
+        valence: The assigned valence
+
+    Returns:
+        Correction dict if misclassified, None if OK
+    """
+    text_lower = span_text.lower()
+    domain = urt_code[0]  # First letter is domain
+
+    # Rule 1: Price mentions should be V codes
+    if domain != 'V':
+        for pattern in PRICE_PATTERNS:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                # Determine which V code
+                if any(re.search(p, text_lower, re.I) for p in [r'hidden', r'extra', r'surprise', r'unexpected', r'trampa']):
+                    suggested = 'V1.03'
+                elif any(re.search(p, text_lower, re.I) for p in [r'overcharge', r'wrong.*charge', r'billing']):
+                    suggested = 'V4.04'
+                else:
+                    suggested = 'V1.01'
+
+                logger.debug(f"Validation: {urt_code} → {suggested} (price mention)")
+                return {
+                    'suggested_urt': suggested,
+                    'reason': 'price_mention',
+                    'pattern': pattern,
+                }
+
+    # Rule 2: Staff behavior should be P codes
+    if domain != 'P':
+        for pattern in STAFF_PATTERNS:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                # Determine which P code
+                if any(re.search(p, text_lower, re.I) for p in [r'rude', r'unfriendly', r'disrespect', r'unfreundlich']):
+                    suggested = 'P1.02'
+                elif any(re.search(p, text_lower, re.I) for p in [r'impatient', r'rushed']):
+                    suggested = 'P1.03'
+                else:
+                    suggested = 'P1.01'
+
+                logger.debug(f"Validation: {urt_code} → {suggested} (staff behavior)")
+                return {
+                    'suggested_urt': suggested,
+                    'reason': 'staff_behavior',
+                    'pattern': pattern,
+                }
+
+    # Rule 3: Scam/ethics should be R codes
+    if domain != 'R':
+        for pattern in ETHICS_PATTERNS:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                if any(re.search(p, text_lower, re.I) for p in [r'scam', r'fraud', r'cheat', r'estafa', r'Betrug', r'arnaque']):
+                    suggested = 'R1.02'
+                else:
+                    suggested = 'R1.01'
+
+                logger.debug(f"Validation: {urt_code} → {suggested} (ethics issue)")
+                return {
+                    'suggested_urt': suggested,
+                    'reason': 'ethics_issue',
+                    'pattern': pattern,
+                }
+
+    # Rule 4: Wayfinding should be A1.04
+    if urt_code not in ('A1.04', 'A4.01'):
+        for pattern in WAYFINDING_PATTERNS:
+            if re.search(pattern, text_lower, re.IGNORECASE):
+                logger.debug(f"Validation: {urt_code} → A1.04 (wayfinding)")
+                return {
+                    'suggested_urt': 'A1.04',
+                    'reason': 'wayfinding',
+                    'pattern': pattern,
+                }
+
+    return None  # Classification looks OK
+
+
+def validate_and_fix_spans(spans: list[dict[str, Any]]) -> list[dict[str, Any]]:
+    """
+    Validate and fix a list of spans.
+
+    Args:
+        spans: List of span dicts with 'span_text', 'urt_primary', 'valence'
+
+    Returns:
+        List of spans with corrections applied
+    """
+    corrections_made = 0
+
+    for span in spans:
+        correction = validate_classification(
+            span.get('span_text', ''),
+            span.get('urt_primary', 'O1.01'),
+            span.get('valence', 'V0'),
+        )
+
+        if correction:
+            original = span['urt_primary']
+            span['urt_primary'] = correction['suggested_urt']
+            span['_validation_correction'] = {
+                'original': original,
+                'reason': correction['reason'],
+            }
+            corrections_made += 1
+
+    if corrections_made:
+        logger.info(f"Validation corrected {corrections_made} spans")
+
+    return spans
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/config_resolver.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/config_resolver.py
@@ -0,0 +1,262 @@
+"""
+Config Resolver - Resolves L1 config + sector brief for classification.
+
+Builds a single JSON payload per business containing:
+- Enabled primitives (L1 + always-on meta)
+- Weights
+- Sector brief (language/signals)
+- Minimal primitive dictionary
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from pathlib import Path
+from typing import Any, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# Paths
+DATA_DIR = Path(__file__).parent.parent.parent.parent / "data"
+CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
+BRIEFS_DIR = DATA_DIR / "sector_briefs"
+PRIMITIVES_FILE = DATA_DIR / "primitives.json"
+
+# Meta primitives - always enabled, never weighted
+META_PRIMITIVES = frozenset([
+    "HONESTY", "ETHICS", "PROMISES",  # Trust
+    "ACKNOWLEDGMENT", "RESPONSE_QUALITY", "RECOVERY",  # Resolution
+    "RETURN_INTENT", "RECOMMEND", "RECOGNITION",  # Loyalty
+    "UNMAPPED",  # Escape
+])
+
+# Core primitives dictionary (frozen 36 - minimal for prompt)
+CORE_PRIMITIVES = {
+    # Quality (8)
+    "TASTE": {"domain": "O", "name": "Taste/Flavor", "def": "Sensory quality of food/beverage"},
+    "CRAFT": {"domain": "O", "name": "Craftsmanship", "def": "Skill of execution/preparation"},
+    "FRESHNESS": {"domain": "O", "name": "Freshness", "def": "Newness, not stale or old"},
+    "TEMPERATURE": {"domain": "O", "name": "Temperature", "def": "Hot/cold as expected"},
+    "EFFECTIVENESS": {"domain": "O", "name": "Effectiveness", "def": "Achieves intended purpose"},
+    "ACCURACY": {"domain": "O", "name": "Accuracy", "def": "Correct, as ordered/specified"},
+    "CONDITION": {"domain": "O", "name": "Condition", "def": "Physical state, wear, damage"},
+    "CONSISTENCY": {"domain": "O", "name": "Consistency", "def": "Same quality each time"},
+    # Service (4)
+    "MANNER": {"domain": "P", "name": "Manner/Attitude", "def": "Friendliness, respect, warmth"},
+    "COMPETENCE": {"domain": "P", "name": "Competence", "def": "Knowledge and skill of staff"},
+    "ATTENTIVENESS": {"domain": "P", "name": "Attentiveness", "def": "Being present, responsive"},
+    "COMMUNICATION": {"domain": "P", "name": "Communication", "def": "Clarity, listening, updates"},
+    # Process (4)
+    "SPEED": {"domain": "J", "name": "Speed/Wait", "def": "Time to service, waiting"},
+    "FRICTION": {"domain": "J", "name": "Friction", "def": "Obstacles, hassles, complexity"},
+    "RELIABILITY": {"domain": "J", "name": "Reliability", "def": "Dependable, keeps promises"},
+    "AVAILABILITY": {"domain": "J", "name": "Availability", "def": "Open when needed, bookable"},
+    # Environment (6)
+    "CLEANLINESS": {"domain": "E", "name": "Cleanliness", "def": "Hygiene, tidiness"},
+    "COMFORT": {"domain": "E", "name": "Comfort", "def": "Physical ease, seating"},
+    "SAFETY": {"domain": "E", "name": "Safety", "def": "Free from harm/danger"},
+    "AMBIANCE": {"domain": "E", "name": "Ambiance", "def": "Atmosphere, mood, vibe"},
+    "ACCESSIBILITY": {"domain": "E", "name": "Accessibility", "def": "Easy to reach, navigate"},
+    "DIGITAL_UX": {"domain": "E", "name": "Digital Experience", "def": "Website, app, online"},
+    # Value (4)
+    "PRICE_LEVEL": {"domain": "V", "name": "Price Level", "def": "Absolute cost (cheap/expensive)"},
+    "PRICE_FAIRNESS": {"domain": "V", "name": "Price Fairness", "def": "Reasonable for what you get"},
+    "PRICE_TRANSPARENCY": {"domain": "V", "name": "Price Transparency", "def": "No hidden fees, clear pricing"},
+    "VALUE_FOR_MONEY": {"domain": "V", "name": "Value for Money", "def": "Worth what you paid"},
+}
+
+
+class ConfigResolver:
+    """
+    Resolves classification config for a business.
+
+    Usage:
+        resolver = ConfigResolver()
+        payload = await resolver.resolve("Go Karts Mar Menor", pool)
+    """
+
+    def __init__(self):
+        self._l1_cache: dict[str, dict] = {}
+        self._brief_cache: dict[str, dict] = {}
+
+    def _load_l1_config(self, sector_code: str) -> dict[str, Any] | None:
+        """Load L1 config from file."""
+        if sector_code in self._l1_cache:
+            return self._l1_cache[sector_code]
+
+        config_path = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
+        if not config_path.exists():
+            logger.warning(f"No L1 config for sector {sector_code}")
+            return None
+
+        with open(config_path) as f:
+            config = json.load(f)
+
+        self._l1_cache[sector_code] = config
+        return config
+
+    def _load_sector_brief(self, sector_code: str) -> dict[str, Any] | None:
+        """Load sector brief from file."""
+        if sector_code in self._brief_cache:
+            return self._brief_cache[sector_code]
+
+        brief_path = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
+        if not brief_path.exists():
+            logger.warning(f"No sector brief for {sector_code}")
+            return None
+
+        with open(brief_path) as f:
+            brief = json.load(f)
+
+        self._brief_cache[sector_code] = brief
+        return brief
+
+    async def get_business_mapping(
+        self,
+        pool: asyncpg.Pool,
+        business_id: str,
+    ) -> dict[str, Any] | None:
+        """Get business → taxonomy mapping from database."""
+        query = """
+            SELECT business_id, gbp_path::text, sector_code
+            FROM pipeline.business_taxonomy_map
+            WHERE business_id = $1
+        """
+        row = await pool.fetchrow(query, business_id)
+        if not row:
+            return None
+        return dict(row)
+
+    def resolve_enabled_set(self, l1_config: dict) -> set[str]:
+        """
+        Compute final enabled primitive set.
+
+        = L1.enabled + META_PRIMITIVES (always-on)
+        """
+        enabled = set(l1_config.get("enabled", []))
+        enabled.update(META_PRIMITIVES)
+        return enabled
+
+    def resolve_weights(self, l1_config: dict) -> dict[str, float]:
+        """Get weights from L1 config."""
+        return dict(l1_config.get("weights", {}))
+
+    def build_primitives_for_prompt(
+        self,
+        enabled: set[str],
+        weights: dict[str, float],
+    ) -> dict[str, dict]:
+        """
+        Build minimal primitives dict for prompt.
+
+        Only includes enabled primitives with their definitions.
+        """
+        result = {}
+        for prim in enabled:
+            if prim in CORE_PRIMITIVES:
+                entry = CORE_PRIMITIVES[prim].copy()
+                if prim in weights:
+                    entry["weight"] = weights[prim]
+                result[prim] = entry
+            elif prim in META_PRIMITIVES:
+                # Meta primitives - minimal entry
+                result[prim] = {"domain": "M", "name": prim.replace("_", " ").title(), "meta": True}
+        return result
+
+    def extract_brief_signals(self, brief: dict) -> dict[str, Any]:
+        """
+        Extract relevant signals from sector brief for prompt.
+
+        Keeps it minimal to avoid bloating context.
+        """
+        if not brief:
+            return {}
+
+        return {
+            "sector": brief.get("sector_code"),
+            "what_customers_judge": brief.get("what_customers_judge"),
+            "critical_pain_points": brief.get("critical_pain_points"),
+            "industry_terminology": brief.get("industry_terminology"),
+        }
+
+    async def resolve(
+        self,
+        business_id: str,
+        pool: asyncpg.Pool,
+        mode: str | None = None,
+    ) -> dict[str, Any] | None:
+        """
+        Resolve full classification payload for a business.
+
+        Args:
+            business_id: Business identifier
+            pool: Database connection pool
+            mode: Optional service mode (e.g., "dine_in", "delivery")
+
+        Returns:
+            Classification payload or None if business not mapped
+        """
+        # Get business mapping
+        mapping = await self.get_business_mapping(pool, business_id)
+        if not mapping:
+            logger.warning(f"Business not mapped: {business_id}")
+            return None
+
+        sector_code = mapping["sector_code"]
+        gbp_path = mapping["gbp_path"]
+
+        # Load L1 config
+        l1_config = self._load_l1_config(sector_code)
+        if not l1_config:
+            logger.warning(f"No L1 config for {sector_code}, using defaults")
+            l1_config = {"enabled": list(CORE_PRIMITIVES.keys()), "weights": {}}
+
+        # Load sector brief
+        brief = self._load_sector_brief(sector_code)
+
+        # Resolve enabled set and weights
+        enabled = self.resolve_enabled_set(l1_config)
+        weights = self.resolve_weights(l1_config)
+
+        # Build primitives for prompt
+        primitives = self.build_primitives_for_prompt(enabled, weights)
+
+        # Extract brief signals
+        brief_signals = self.extract_brief_signals(brief)
+
+        # Build payload
+        payload = {
+            "business_id": business_id,
+            "gbp_path": gbp_path,
+            "sector_code": sector_code,
+            "config_version": l1_config.get("config_version", "1.0"),
+            "modes": [mode] if mode else ["in_person"],
+            "default_mode": mode or "in_person",
+            "enabled_primitives": sorted(enabled),
+            "disabled_primitives": sorted(l1_config.get("disabled", [])),
+            "weights": weights,
+            "brief": brief_signals,
+            "primitives": primitives,
+        }
+
+        logger.info(
+            f"Resolved config for {business_id}: "
+            f"sector={sector_code}, enabled={len(enabled)}, weights={len(weights)}"
+        )
+
+        return payload
+
+
+# Convenience function
+async def resolve_business_config(
+    business_id: str,
+    pool: asyncpg.Pool,
+    mode: str | None = None,
+) -> dict[str, Any] | None:
+    """Resolve classification config for a business."""
+    resolver = ConfigResolver()
+    return await resolver.resolve(business_id, pool, mode)
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/gbp_primitive_prompts.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/gbp_primitive_prompts.py
@@ -0,0 +1,571 @@
+"""
+LLM prompts for generating sparse primitive config deltas for GBP hierarchy nodes.
+
+These prompts are used to populate L1 (sector) and L2 (category) nodes in the
+GBP category tree with business-specific primitive configurations.
+
+The output is a sparse delta that only includes primitives that need overrides
+for that specific business type. Configuration inheritance handles the rest.
+"""
+
+# =============================================================================
+# SYSTEM PROMPT
+# =============================================================================
+
+SYSTEM_PROMPT_GBP_PRIMITIVE_CONFIG = """You are a customer experience taxonomy configuration specialist. Your task is to generate sparse primitive configuration deltas for Google Business Profile (GBP) category nodes.
+
+## YOUR ROLE
+
+You configure how the Universal Review Taxonomy (URT) primitives should be weighted, labeled, and detected for specific business types. Each primitive represents a distinct dimension of customer experience that appears in reviews.
+
+## THE 36 PRIMITIVES (Grouped by Domain)
+
+### OFFERING (O) - What the business provides
+- WORKS: Does the product/service function correctly?
+- PERFORMANCE: How well does it perform?
+- DURABILITY: How long does it last?
+- RELIABILITY: Is it consistent over time?
+- OUTCOME: Did the customer achieve their goal?
+- MATERIALS: Quality of ingredients/components
+- CRAFTSMANSHIP: Skill of construction/execution
+- PRESENTATION: Visual/aesthetic quality
+- ATTENTION_TO_DETAIL: Finishing touches
+- CONDITION: State at delivery
+- COMPLETENESS: All components present?
+- FEATURES: Promised features available?
+- SCOPE: Full scope delivered?
+- DOCUMENTATION: Supporting materials
+- SPEC_MATCH: Matches what was ordered?
+- PERSONALIZATION: Adapted to individual
+- FLEXIBILITY: Can be modified?
+- APPROPRIATENESS: Right solution for need?
+
+### PEOPLE (P) - Staff interactions
+- WARMTH: Friendly manner
+- RESPECT: Dignity and courtesy
+- EMPATHY: Understanding feelings
+- PATIENCE: Calm and tolerant
+- ENTHUSIASM: Energy and engagement
+- KNOWLEDGE: Expertise level
+- SKILL: Technical ability
+- PROBLEM_SOLVING: Finding solutions
+- PROFESSIONALISM: Conduct standards
+- EXPERIENCE: Depth of expertise
+- ATTENTIVENESS: Being present
+- INITIATIVE: Proactive help
+- AVAILABILITY: Present when needed
+- FOLLOW_THROUGH: Completing promises
+- URGENCY: Appropriate prioritization
+- CLARITY: Clear communication
+- LISTENING: Understanding needs
+- PROACTIVE_UPDATES: Keeping informed
+- ACCURACY: Correct information
+- TONE: Communication style
+
+### JOURNEY (J) - Process and timing
+- WAIT_TIME: Time spent waiting
+- SPEED: How fast things happen
+- RESPONSE_TIME: Time to respond
+- PUNCTUALITY: On-time delivery
+- PACING: Appropriate speed
+- SIMPLICITY: Easy process
+- NAVIGATION: Finding things
+- PAPERWORK: Documentation burden
+- HANDOFFS: Transitions
+- SELF_SERVICE: Autonomy options
+- CONSISTENCY: Same each time
+- PROCESS_ACCURACY: Correct execution
+- UPTIME: System availability
+- PREDICTABILITY: Expectations met
+- ERROR_RATE: Frequency of mistakes
+- ACKNOWLEDGMENT: Recognizing issues
+- RESOLUTION_PROCESS: How problems handled
+- RESOLUTION_SPEED: Time to fix
+- RESOLUTION_QUALITY: Adequacy of fix
+- PREVENTION: Avoiding recurrence
+
+### ENVIRONMENT (E) - Physical and digital space
+- CLEANLINESS: Hygiene and tidiness
+- MAINTENANCE: Condition and upkeep
+- LAYOUT: Functional arrangement
+- EQUIPMENT: Tools and amenities
+- SIGNAGE: Navigation aids
+- INTERFACE_DESIGN: Digital UX
+- DIGITAL_FUNCTIONALITY: Features working
+- DIGITAL_PERFORMANCE: Speed/responsiveness
+- DIGITAL_NAVIGATION: Finding things online
+- MOBILE_EXPERIENCE: Smartphone optimization
+- ATMOSPHERE: Overall mood
+- NOISE: Sound environment
+- TEMPERATURE: Climate comfort
+- CROWDING: Density/space
+- AESTHETICS: Visual appeal
+- PHYSICAL_SAFETY: Protection from harm
+- HEALTH_HYGIENE: Sanitation standards
+- SECURITY: Protection of person/property
+- COMFORT: Physical ease
+- EMERGENCY_READINESS: Preparedness
+
+### ACCESS (A) - Availability and accessibility
+- HOURS: Operating hours
+- BOOKING: Appointment access
+- INVENTORY: Product availability
+- STAFFING: Personnel available
+- GEOGRAPHIC_REACH: Service area
+- PHYSICAL_ACCESSIBILITY: Mobility access
+- VISUAL_ACCESSIBILITY: Sight accommodations
+- HEARING_ACCESSIBILITY: Audio accommodations
+- COGNITIVE_ACCESSIBILITY: Mental accommodations
+- DIGITAL_ACCESSIBILITY: Assistive tech support
+- LANGUAGE_SUPPORT: Multiple languages
+- CULTURAL_SENSITIVITY: Background respect
+- DIETARY_MEDICAL: Restriction accommodations
+- FAMILY_FRIENDLY: Children accommodation
+- EQUAL_TREATMENT: Non-discrimination
+- LOCATION: Convenience
+- PARKING: Vehicle accommodation
+- TRANSIT: Public transport
+- PAYMENT_OPTIONS: How you can pay
+- CONTACT_OPTIONS: Ways to reach
+
+### VALUE (V) - Cost and worth
+- ABSOLUTE_PRICE: The actual cost
+- PRICE_VS_EXPECTATION: Compared to anticipated
+- PRICE_VS_MARKET: Compared to competitors
+- HIDDEN_COSTS: Unexpected charges
+- PAYMENT_FLEXIBILITY: Terms and options
+- PRICING_CLARITY: Understanding costs
+- FEE_DISCLOSURE: Upfront about charges
+- ADVERTISING_ACCURACY: Marketing matches reality
+- TERMS_FAIRNESS: Policy reasonableness
+- HONEST_REPRESENTATION: Truthful claims
+- TIME_INVESTMENT: Hours required
+- MENTAL_EFFORT: Cognitive load
+- PHYSICAL_EFFORT: Bodily exertion
+- HASSLE_FACTOR: Cumulative frustration
+- OPPORTUNITY_COST: What else could be done
+- OVERALL_VALUE: Total assessment
+- QUALITY_PRICE_RATIO: What you get for what you pay
+- SATISFACTION: Contentment with exchange
+- RECOMMENDATION: Would suggest to others
+- RETURN_INTENT: Would come back
+
+### RELATIONSHIP (R) - Trust and loyalty
+- TRUTHFULNESS: Accurate representations
+- PROMISE_KEEPING: Honoring commitments
+- TRANSPARENCY: Openness about practices
+- ETHICS: Moral business conduct
+- FAIR_DEALING: Equitable treatment
+- TRACK_RECORD: Historical performance
+- DEPENDABILITY: Same over time
+- STABILITY: Organizational continuity
+- TRUSTWORTHINESS: Warranting confidence
+- GUARANTEE_HONOR: Standing behind product
+- ERROR_ACKNOWLEDGMENT: Admitting failures
+- APOLOGY: Expression of regret
+- COMPENSATION: Making amends
+- IMPROVEMENT: Actions to prevent recurrence
+- OWNERSHIP: Taking responsibility
+- RECOGNITION: Acknowledging customers
+- REWARDS: Loyalty benefits
+- RELATIONSHIP_BUILDING: Investment in connection
+- ONGOING_COMMUNICATION: Contact quality
+- COMMUNITY: Belonging and connection
+
+## META PRIMITIVES (DO NOT INCLUDE IN OUTPUT)
+
+These are always globally active and should NEVER appear in your output:
+- HONESTY, ETHICS, PROMISES, ACKNOWLEDGMENT, RESPONSE_QUALITY
+- RECOVERY, RETURN_INTENT, RECOMMEND, RECOGNITION, UNMAPPED
+
+## OUTPUT RULES
+
+1. **SPARSE OUTPUT ONLY**: Only include primitives that DIFFER from parent configuration
+   - If parent has WAIT_TIME at "normal" priority and this business needs "critical", include it
+   - If parent already has the right configuration, do NOT include it
+
+2. **PRIORITY LEVELS** (use exact strings):
+   - "critical": Essential for this business (top 3-5 per business)
+   - "high": Very important (next 5-8)
+   - "normal": Standard relevance (default)
+   - "low": Less common for this business
+   - "very_low": Rarely relevant (prefer over active: false)
+
+3. **WHEN TO SET active: false**:
+   - Only when a primitive is truly IRRELEVANT (not just uncommon)
+   - Example: PARKING for an online-only business
+   - Prefer priority: "very_low" unless truly N/A
+
+4. **SIGNALS**: 5-15 realistic customer phrases per side
+   - Use actual language customers use in reviews
+   - Include colloquial expressions, not formal descriptions
+   - Positive and negative should be opposites of the same dimension
+   - Use __replace__: true ONLY if parent signals are wrong (rare)
+
+5. **MODES**: Only include if this business has distinct service modes
+   - Examples: "dine_in" vs "delivery" for restaurants
+   - "in_store" vs "online" for retailers
+   - Most businesses: omit modes entirely
+
+6. **business_context**: Include for L1 sectors and leaf categories
+   - name: Human-friendly display name
+   - description: 1-2 sentence description
+   - modes: Array of applicable modes (if any)
+   - default_mode: Primary mode (if modes exist)
+
+## VALIDATION RULES
+
+Your output MUST:
+- Be valid JSON only (no markdown, no explanations)
+- Use ONLY primitive codes from the dictionary provided
+- NOT create new primitive codes
+- NOT include meta primitives
+- NOT include playbooks, solutions, or action recommendations
+- Have at least one primitive_config entry
+- Use exact priority strings: "critical", "high", "normal", "low", "very_low"
+
+## OUTPUT SCHEMA
+
+```json
+{
+  "business_context": {
+    "name": "Human-Friendly Name",
+    "description": "What this business type does and what matters to customers",
+    "modes": ["mode1", "mode2"],
+    "default_mode": "mode1"
+  },
+  "primitive_configs": {
+    "PRIMITIVE_CODE": {
+      "active": true,
+      "priority": "critical|high|normal|low|very_low",
+      "label": "Business-specific label for this primitive",
+      "description": "What this primitive means for this specific business",
+      "signals": {
+        "positive": ["signal 1", "signal 2", "..."],
+        "negative": ["signal 1", "signal 2", "..."],
+        "__replace__": false
+      },
+      "modes": {
+        "mode_name": {
+          "applicable": true,
+          "label": "Mode-specific label"
+        }
+      }
+    }
+  }
+}
+```
+
+Return ONLY the JSON object. No preamble, no explanation, no markdown."""
+
+
+# =============================================================================
+# USER PROMPT TEMPLATE
+# =============================================================================
+
+USER_PROMPT_TEMPLATE = """Generate a sparse primitive configuration delta for this GBP node.
+
+## NODE INFORMATION
+
+**GBP Path**: {gbp_path}
+**Node Name**: {node_name}
+**Node Description**: {node_description}
+**Node Level**: {node_level} (L1=Sector, L2=Category, L3=Subcategory, L4=Leaf)
+
+## PARENT RESOLVED CONFIGURATION
+
+This is the already-resolved configuration from all ancestors. Only include primitives that need to CHANGE from this:
+
+```json
+{parent_resolved_config}
+```
+
+## PRIMITIVE DICTIONARY
+
+Reference for all available primitives with their base definitions:
+
+```json
+{primitive_dictionary}
+```
+
+## YOUR TASK
+
+Generate a sparse delta configuration for "{node_name}" that:
+
+1. Identifies the 5-10 MOST CRITICAL primitives for this business type
+2. Adjusts priority levels to reflect what customers actually care about
+3. Provides business-specific labels and signals where helpful
+4. Only includes primitives that DIFFER from parent_resolved_config
+5. Uses realistic customer language for signals
+
+Think about:
+- What do customers of {node_name} businesses typically praise or complain about?
+- Which URT primitives are most actionable for this business type?
+- What unique aspects distinguish this business type from others?
+
+Return ONLY valid JSON matching the output schema."""
+
+
+# =============================================================================
+# HELPER FUNCTIONS
+# =============================================================================
+
+def build_user_prompt(
+    gbp_path: str,
+    node_name: str,
+    node_description: str,
+    node_level: int,
+    parent_resolved_config: dict,
+    primitive_dictionary: dict,
+) -> str:
+    """
+    Build the user prompt with actual values substituted.
+
+    Args:
+        gbp_path: The ltree path (e.g., "Food_Beverage" or "Food_Beverage.Restaurants")
+        node_name: Human-readable name (e.g., "Food & Beverage" or "Restaurants")
+        node_description: Brief description of this business category
+        node_level: 1-4 indicating hierarchy depth
+        parent_resolved_config: Already-resolved config from ancestors (or {} for L1)
+        primitive_dictionary: All primitives with definitions and base signals
+
+    Returns:
+        Formatted user prompt string
+    """
+    import json
+
+    level_labels = {
+        1: "L1=Sector",
+        2: "L2=Category",
+        3: "L3=Subcategory",
+        4: "L4=Leaf",
+    }
+
+    return USER_PROMPT_TEMPLATE.format(
+        gbp_path=gbp_path,
+        node_name=node_name,
+        node_description=node_description,
+        node_level=level_labels.get(node_level, f"L{node_level}"),
+        parent_resolved_config=json.dumps(parent_resolved_config, indent=2),
+        primitive_dictionary=json.dumps(primitive_dictionary, indent=2),
+    )
+
+
+def validate_primitive_config_output(output: dict, primitive_codes: set[str]) -> list[str]:
+    """
+    Validate the LLM output against schema and rules.
+
+    Args:
+        output: Parsed JSON output from LLM
+        primitive_codes: Set of valid primitive codes
+
+    Returns:
+        List of validation errors (empty if valid)
+    """
+    errors = []
+
+    # Meta primitives that should never appear
+    META_PRIMITIVES = {
+        "HONESTY", "ETHICS", "PROMISES", "ACKNOWLEDGMENT", "RESPONSE_QUALITY",
+        "RECOVERY", "RETURN_INTENT", "RECOMMEND", "RECOGNITION", "UNMAPPED"
+    }
+
+    VALID_PRIORITIES = {"critical", "high", "normal", "low", "very_low"}
+
+    # Check required structure
+    if not isinstance(output, dict):
+        errors.append("Output must be a JSON object")
+        return errors
+
+    primitive_configs = output.get("primitive_configs", {})
+    if not primitive_configs:
+        errors.append("primitive_configs is required and must not be empty")
+
+    if not isinstance(primitive_configs, dict):
+        errors.append("primitive_configs must be an object")
+        return errors
+
+    for code, config in primitive_configs.items():
+        # Check code is valid
+        if code not in primitive_codes:
+            errors.append(f"Unknown primitive code: {code}")
+            continue
+
+        # Check for meta primitives
+        if code in META_PRIMITIVES:
+            errors.append(f"Meta primitive should not appear: {code}")
+
+        if not isinstance(config, dict):
+            errors.append(f"{code}: config must be an object")
+            continue
+
+        # Check priority if present
+        priority = config.get("priority")
+        if priority and priority not in VALID_PRIORITIES:
+            errors.append(f"{code}: invalid priority '{priority}', must be one of {VALID_PRIORITIES}")
+
+        # Check signals structure if present
+        signals = config.get("signals")
+        if signals:
+            if not isinstance(signals, dict):
+                errors.append(f"{code}: signals must be an object")
+            else:
+                pos = signals.get("positive", [])
+                neg = signals.get("negative", [])
+                if pos and not isinstance(pos, list):
+                    errors.append(f"{code}: signals.positive must be an array")
+                if neg and not isinstance(neg, list):
+                    errors.append(f"{code}: signals.negative must be an array")
+
+    # Check business_context if present
+    business_context = output.get("business_context")
+    if business_context:
+        if not isinstance(business_context, dict):
+            errors.append("business_context must be an object")
+        else:
+            modes = business_context.get("modes")
+            if modes and not isinstance(modes, list):
+                errors.append("business_context.modes must be an array")
+
+    return errors
+
+
+# =============================================================================
+# EXAMPLE PRIMITIVE DICTIONARY (subset for reference)
+# =============================================================================
+
+EXAMPLE_PRIMITIVE_DICTIONARY = {
+    "WAIT_TIME": {
+        "code": "WAIT_TIME",
+        "domain": "J",
+        "category": "Timing",
+        "name": "Wait Time",
+        "definition": "Time spent waiting for service",
+        "base_signals": {
+            "positive": ["no wait", "seated immediately", "right away", "quick turnaround"],
+            "negative": ["long wait", "waited forever", "45 minutes", "hours to be seen"]
+        }
+    },
+    "WARMTH": {
+        "code": "WARMTH",
+        "domain": "P",
+        "category": "Attitude",
+        "name": "Warmth/Friendliness",
+        "definition": "Approachability and pleasantness of staff",
+        "base_signals": {
+            "positive": ["so friendly", "welcoming", "made us feel at home", "warm greeting"],
+            "negative": ["cold", "unfriendly", "rude", "didn't acknowledge us"]
+        }
+    },
+    "CRAFTSMANSHIP": {
+        "code": "CRAFTSMANSHIP",
+        "domain": "O",
+        "category": "Quality",
+        "name": "Craftsmanship",
+        "definition": "Skill of construction or execution",
+        "base_signals": {
+            "positive": ["beautifully made", "expert work", "attention to detail", "quality craftsmanship"],
+            "negative": ["sloppy work", "poorly made", "amateur job", "uneven"]
+        }
+    },
+    # ... more primitives would be included in full dictionary
+}
+
+
+# =============================================================================
+# EXAMPLE OUTPUT (for reference and testing)
+# =============================================================================
+
+EXAMPLE_OUTPUT_RESTAURANT = {
+    "business_context": {
+        "name": "Restaurants",
+        "description": "Food service establishments where customers dine on-premises or order for delivery/takeout",
+        "modes": ["dine_in", "takeout", "delivery"],
+        "default_mode": "dine_in"
+    },
+    "primitive_configs": {
+        "WAIT_TIME": {
+            "priority": "critical",
+            "label": "Wait for Table/Food",
+            "description": "Time waiting to be seated and for food to arrive",
+            "signals": {
+                "positive": [
+                    "seated immediately",
+                    "food came out fast",
+                    "no wait for a table",
+                    "quick service",
+                    "didn't have to wait long"
+                ],
+                "negative": [
+                    "waited 45 minutes for a table",
+                    "food took forever",
+                    "an hour for appetizers",
+                    "still waiting for our entrees",
+                    "had to flag down the waiter"
+                ]
+            },
+            "modes": {
+                "dine_in": {
+                    "applicable": True,
+                    "label": "Wait for Table & Food"
+                },
+                "takeout": {
+                    "applicable": True,
+                    "label": "Order Ready Time"
+                },
+                "delivery": {
+                    "applicable": True,
+                    "label": "Delivery Time"
+                }
+            }
+        },
+        "CRAFTSMANSHIP": {
+            "priority": "critical",
+            "label": "Food Preparation Quality",
+            "description": "Skill and care in cooking and food preparation",
+            "signals": {
+                "positive": [
+                    "cooked to perfection",
+                    "beautifully plated",
+                    "chef knows what they're doing",
+                    "perfectly seasoned",
+                    "amazing flavor"
+                ],
+                "negative": [
+                    "overcooked",
+                    "bland and tasteless",
+                    "clearly microwaved",
+                    "burnt edges",
+                    "undercooked chicken"
+                ]
+            }
+        },
+        "WARMTH": {
+            "priority": "high",
+            "label": "Server Friendliness",
+            "description": "Warmth and hospitality from hosts, servers, and staff"
+        },
+        "CLEANLINESS": {
+            "priority": "high",
+            "label": "Restaurant Cleanliness",
+            "description": "Hygiene of dining area, bathrooms, and visible kitchen areas"
+        },
+        "ATMOSPHERE": {
+            "priority": "high",
+            "label": "Dining Ambiance",
+            "description": "Overall mood, decor, lighting, and vibe of the restaurant"
+        },
+        "PARKING": {
+            "priority": "normal",
+            "modes": {
+                "dine_in": {"applicable": True},
+                "takeout": {"applicable": True},
+                "delivery": {"applicable": False}
+            }
+        },
+        "DIGITAL_ACCESSIBILITY": {
+            "priority": "very_low",
+            "description": "Screen reader support and digital accessibility - rarely mentioned in restaurant reviews"
+        }
+    }
+}
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/llm_client.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/llm_client.py
@@ -6,6 +6,7 @@ Provides a unified interface for classification requests with:
 - Structured output (JSON mode)
 - Retry handling
 - Cost tracking
+- Adaptive batch sizing based on context window
 """

 from __future__ import annotations
@@ -14,7 +15,8 @@ import json
 import logging
 import time
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, TypedDict

 if TYPE_CHECKING:
    from reviewiq_pipeline.config import Config
@@ -22,6 +24,240 @@ if TYPE_CHECKING:

 logger = logging.getLogger(__name__)

+
+# =============================================================================
+# Exceptions
+# =============================================================================
+
+
+class PartialBatchResult(Exception):
+    """
+    Exception raised when batch JSON parsing partially fails but some results were recovered.
+
+    Carries the partial results and indices of missing reviews so the caller can
+    only reprocess the missing ones instead of the entire batch.
+    """
+
+    def __init__(
+        self,
+        message: str,
+        partial_results: list[dict[str, Any]],
+        missing_indices: list[int],
+        metadata: dict[str, Any] | None = None,
+    ):
+        super().__init__(message)
+        self.partial_results = partial_results
+        self.missing_indices = missing_indices
+        self.metadata = metadata or {}
+
+
+# =============================================================================
+# Model Context Windows and Token Estimation
+# =============================================================================
+
+MODEL_CONTEXT_WINDOWS = {
+    # OpenAI models
+    "gpt-4o": 128_000,
+    "gpt-4o-mini": 128_000,
+    "gpt-4-turbo": 128_000,
+    "gpt-4": 8_192,
+    "gpt-3.5-turbo": 16_385,
+    # Anthropic models
+    "claude-3-opus-20240229": 200_000,
+    "claude-3-sonnet-20240229": 200_000,
+    "claude-3-haiku-20240307": 200_000,
+    "claude-3-5-sonnet-20241022": 200_000,
+    "claude-sonnet-4-20250514": 200_000,
+}
+
+# Average tokens per character (rough estimate, varies by language)
+CHARS_PER_TOKEN = 4
+
+# Output tokens per review (classification response)
+OUTPUT_TOKENS_PER_REVIEW = 450  # Conservative estimate
+
+
+@dataclass
+class BatchSizeCalculation:
+    """Result of batch size calculation."""
+    batch_size: int
+    system_prompt_tokens: int
+    avg_tokens_per_review: int
+    output_tokens_reserved: int
+    context_window: int
+    utilization_target: float
+    reasoning: str
+
+
+@dataclass
+class TokenStats:
+    """Running statistics for token estimation."""
+    total_reviews: int = 0
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    min_review_tokens: int = 999999
+    max_review_tokens: int = 0
+
+    def update(self, review_tokens: int, output_tokens: int):
+        """Update stats with new observation."""
+        self.total_reviews += 1
+        self.total_input_tokens += review_tokens
+        self.total_output_tokens += output_tokens
+        self.min_review_tokens = min(self.min_review_tokens, review_tokens)
+        self.max_review_tokens = max(self.max_review_tokens, review_tokens)
+
+    @property
+    def avg_review_tokens(self) -> int:
+        """Average tokens per review."""
+        if self.total_reviews == 0:
+            return 150  # Default estimate
+        return self.total_input_tokens // self.total_reviews
+
+    @property
+    def avg_output_tokens(self) -> int:
+        """Average output tokens per review."""
+        if self.total_reviews == 0:
+            return OUTPUT_TOKENS_PER_REVIEW
+        return self.total_output_tokens // self.total_reviews
+
+
+class BatchSizer:
+    """
+    Calculates optimal batch size based on context window and actual token usage.
+
+    Adapts in real-time based on observed token counts from previous batches.
+    """
+
+    def __init__(
+        self,
+        model: str,
+        system_prompt_tokens: int,
+        target_utilization: float = 0.6,
+    ):
+        self.model = model
+        self.system_prompt_tokens = system_prompt_tokens
+        self.target_utilization = target_utilization
+        self.context_window = MODEL_CONTEXT_WINDOWS.get(model, 128_000)
+        self.stats = TokenStats()
+
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count for text (fast approximation)."""
+        # Simple heuristic: ~4 chars per token for English
+        # More accurate would be to use tiktoken, but this is faster
+        return max(1, len(text) // CHARS_PER_TOKEN)
+
+    def calculate_batch_size(
+        self,
+        reviews: list[dict],
+        fixed_size: int | None = None,
+    ) -> BatchSizeCalculation:
+        """
+        Calculate optimal batch size for a set of reviews.
+
+        Args:
+            reviews: List of reviews with 'text' field
+            fixed_size: If set, use this size (skip calculation)
+
+        Returns:
+            BatchSizeCalculation with recommended size and reasoning
+        """
+        if fixed_size and fixed_size > 0:
+            return BatchSizeCalculation(
+                batch_size=min(fixed_size, len(reviews)),
+                system_prompt_tokens=self.system_prompt_tokens,
+                avg_tokens_per_review=self.stats.avg_review_tokens,
+                output_tokens_reserved=fixed_size * self.stats.avg_output_tokens,
+                context_window=self.context_window,
+                utilization_target=self.target_utilization,
+                reasoning=f"Fixed batch size: {fixed_size}",
+            )
+
+        # Calculate actual token counts for these reviews
+        review_tokens = [self.estimate_tokens(r.get("text", "")) for r in reviews]
+        avg_review_tokens = sum(review_tokens) // len(review_tokens) if review_tokens else 150
+        max_review_tokens = max(review_tokens) if review_tokens else 300
+
+        # Use learned average if we have history, otherwise use current batch
+        effective_avg = (
+            (self.stats.avg_review_tokens + avg_review_tokens) // 2
+            if self.stats.total_reviews > 0
+            else avg_review_tokens
+        )
+
+        # Use learned output average
+        output_per_review = self.stats.avg_output_tokens
+
+        # Calculate available space
+        available = int(self.context_window * self.target_utilization)
+        available -= self.system_prompt_tokens
+        available -= 1000  # Safety buffer for JSON overhead
+
+        # Calculate batch size
+        # Each review needs: input tokens + output tokens
+        tokens_per_review = effective_avg + output_per_review
+
+        # Use 80th percentile estimate to handle variance
+        # (avg + (max - avg) * 0.3) gives room for longer reviews
+        variance_adjusted = effective_avg + int((max_review_tokens - effective_avg) * 0.3)
+        tokens_per_review_safe = variance_adjusted + output_per_review
+
+        batch_size = max(1, available // tokens_per_review_safe)
+
+        # Cap at reasonable limits
+        batch_size = min(batch_size, 100, len(reviews))
+
+        reasoning = (
+            f"Context: {self.context_window:,} | "
+            f"System: {self.system_prompt_tokens:,} | "
+            f"Avg review: {effective_avg} (variance-adjusted: {variance_adjusted}) | "
+            f"Output/review: {output_per_review} | "
+            f"Target utilization: {self.target_utilization:.0%} | "
+            f"→ Batch size: {batch_size}"
+        )
+
+        return BatchSizeCalculation(
+            batch_size=batch_size,
+            system_prompt_tokens=self.system_prompt_tokens,
+            avg_tokens_per_review=effective_avg,
+            output_tokens_reserved=batch_size * output_per_review,
+            context_window=self.context_window,
+            utilization_target=self.target_utilization,
+            reasoning=reasoning,
+        )
+
+    def update_from_response(self, batch_size: int, input_tokens: int, output_tokens: int):
+        """
+        Update statistics from actual LLM response.
+
+        Call this after each batch to improve future estimates.
+        """
+        if batch_size > 0:
+            avg_input = input_tokens // batch_size
+            avg_output = output_tokens // batch_size
+
+            # Update stats for each review in batch
+            for _ in range(batch_size):
+                self.stats.update(avg_input, avg_output)
+
+            logger.debug(
+                f"BatchSizer updated: {batch_size} reviews, "
+                f"avg input={avg_input}, avg output={avg_output}, "
+                f"running avg input={self.stats.avg_review_tokens}, "
+                f"running avg output={self.stats.avg_output_tokens}"
+            )
+
+    def get_stats_summary(self) -> dict:
+        """Get current statistics summary."""
+        return {
+            "total_reviews_processed": self.stats.total_reviews,
+            "avg_input_tokens": self.stats.avg_review_tokens,
+            "avg_output_tokens": self.stats.avg_output_tokens,
+            "min_review_tokens": self.stats.min_review_tokens if self.stats.total_reviews > 0 else 0,
+            "max_review_tokens": self.stats.max_review_tokens if self.stats.total_reviews > 0 else 0,
+            "model": self.model,
+            "context_window": self.context_window,
+        }
+
 # System prompt for URT classification
 SYSTEM_PROMPT = """You are a review classification system using URT (Universal Review Taxonomy) v5.1.

@@ -329,6 +565,18 @@ Return valid JSON matching this schema. No markdown, no explanations.
 }"""


+class BatchReviewInput(TypedDict):
+    """Input format for batch classification."""
+    review_id: str
+    text: str
+    rating: int
+
+
+class BatchClassificationResponse(TypedDict):
+    """Response format for batch classification."""
+    reviews: list[dict[str, Any]]  # Each contains review_index, spans, review_summary
+
+
 class LLMClientBase(ABC):
    """Abstract base class for LLM clients."""

@@ -337,18 +585,24 @@ class LLMClientBase(ABC):
        self.total_tokens_used = 0
        self.total_cost_usd = 0.0
        self._custom_prompt: str | None = None
+        self._custom_prompt_batch: str | None = None
+        self._cached_tokens: int = 0  # Track cached token usage

-    def set_prompt(self, prompt: str) -> None:
+    def set_prompt(self, prompt: str, batch_prompt: str | None = None) -> None:
        """
-        Set a custom system prompt (e.g., built dynamically from database).
+        Set custom system prompts (e.g., built dynamically from database).

        Args:
-            prompt: The system prompt to use for classification
+            prompt: The system prompt for single review classification
+            batch_prompt: The system prompt for batch classification (if different)
        """
        self._custom_prompt = prompt
+        self._custom_prompt_batch = batch_prompt or prompt

-    def get_prompt(self) -> str:
+    def get_prompt(self, batch_mode: bool = False) -> str:
        """Get the current system prompt (custom or default)."""
+        if batch_mode:
+            return self._custom_prompt_batch or self._custom_prompt or SYSTEM_PROMPT
        return self._custom_prompt or SYSTEM_PROMPT

    @abstractmethod
@@ -358,7 +612,7 @@ class LLMClientBase(ABC):
        profile: str = "standard",
    ) -> tuple[LLMClassificationResponse, dict[str, Any]]:
        """
-        Classify a review and extract spans.
+        Classify a single review and extract spans.

        Args:
            review_text: The review text to classify
@@ -369,6 +623,24 @@ class LLMClientBase(ABC):
        """
        pass

+    @abstractmethod
+    async def classify_batch(
+        self,
+        reviews: list[BatchReviewInput],
+        profile: str = "standard",
+    ) -> tuple[list[LLMClassificationResponse], dict[str, Any]]:
+        """
+        Classify multiple reviews in a single LLM call.
+
+        Args:
+            reviews: List of reviews with review_id, text, and rating
+            profile: Classification profile (lite/core/standard/full)
+
+        Returns:
+            Tuple of (list of classification responses, aggregated metadata)
+        """
+        pass
+
    @abstractmethod
    async def generate(
        self,
@@ -396,16 +668,31 @@ class LLMClientBase(ABC):
        """Close the client and cleanup resources."""
        pass

+    def _build_batch_user_prompt(self, reviews: list[BatchReviewInput]) -> str:
+        """Build user prompt for batch classification."""
+        lines = [
+            f"Classify these {len(reviews)} reviews. Return JSON with 'reviews' array.",
+            ""
+        ]
+
+        for i, review in enumerate(reviews):
+            lines.append(f"---REVIEW {i} (rating={review['rating']}★)---")
+            lines.append(review["text"])
+            lines.append("")
+
+        return "\n".join(lines)
+

 class OpenAIClient(LLMClientBase):
-    """OpenAI LLM client implementation."""
+    """OpenAI LLM client implementation with batch support and prompt caching."""

    # Pricing per 1M tokens (as of 2024)
+    # Cached input tokens are 50% cheaper
    PRICING = {
-        "gpt-4o": {"input": 5.0, "output": 15.0},
-        "gpt-4o-mini": {"input": 0.15, "output": 0.60},
-        "gpt-4-turbo": {"input": 10.0, "output": 30.0},
-        "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
+        "gpt-4o": {"input": 2.50, "cached_input": 1.25, "output": 10.0},
+        "gpt-4o-mini": {"input": 0.15, "cached_input": 0.075, "output": 0.60},
+        "gpt-4-turbo": {"input": 10.0, "cached_input": 5.0, "output": 30.0},
+        "gpt-3.5-turbo": {"input": 0.50, "cached_input": 0.25, "output": 1.50},
    }

    def __init__(self, config: Config):
@@ -420,7 +707,7 @@ class OpenAIClient(LLMClientBase):
        review_text: str,
        profile: str = "standard",
    ) -> tuple[LLMClassificationResponse, dict[str, Any]]:
-        """Classify using OpenAI."""
+        """Classify a single review using OpenAI."""
        start_time = time.time()

        messages = [
@@ -446,27 +733,154 @@ class OpenAIClient(LLMClientBase):

        result = json.loads(content)

-        # Calculate costs
+        # Calculate costs (with caching support)
+        metadata = self._calculate_openai_costs(response, start_time)
+
+        return result, metadata
+
+    async def classify_batch(
+        self,
+        reviews: list[BatchReviewInput],
+        profile: str = "standard",
+    ) -> tuple[list[LLMClassificationResponse], dict[str, Any]]:
+        """
+        Classify multiple reviews in a single LLM call.
+
+        Uses prompt caching - the system prompt is cached after first call,
+        reducing input token costs by ~50% on subsequent calls.
+        """
+        if not reviews:
+            return [], {"error": "No reviews provided"}
+
+        start_time = time.time()
+
+        # Build batch user prompt
+        user_prompt = self._build_batch_user_prompt(reviews)
+
+        messages = [
+            {"role": "system", "content": self.get_prompt(batch_mode=True)},
+            {"role": "user", "content": user_prompt},
+        ]
+
+        # Calculate max tokens based on batch size (estimate ~400 tokens per review output)
+        max_output_tokens = min(16000, len(reviews) * 500)
+
+        response = await self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=self.config.llm_temperature,
+            response_format={"type": "json_object"},
+            max_tokens=max_output_tokens,
+            timeout=self.config.llm_timeout_seconds * 2,  # Longer timeout for batch
+        )
+
+        # Parse response
+        content = response.choices[0].message.content
+        if not content:
+            raise ValueError("Empty response from OpenAI")
+
+        metadata = self._calculate_openai_costs(response, start_time, batch_size=len(reviews))
+
+        # Try full JSON parse first
+        try:
+            batch_result = json.loads(content)
+            review_results = self._parse_batch_response(batch_result, reviews)
+            return review_results, metadata
+        except json.JSONDecodeError as e:
+            # Full parse failed - try partial extraction
+            logger.warning(f"Full JSON parse failed: {e}, attempting partial recovery...")
+
+            partial_reviews, missing_indices = self._extract_partial_batch_json(
+                content, len(reviews)
+            )
+
+            if partial_reviews:
+                raise PartialBatchResult(
+                    message=f"Recovered {len(partial_reviews)}/{len(reviews)} reviews from malformed JSON",
+                    partial_results=partial_reviews,
+                    missing_indices=missing_indices,
+                    metadata=metadata,
+                )
+            else:
+                raise
+
+    def _calculate_openai_costs(
+        self,
+        response: Any,
+        start_time: float,
+        batch_size: int = 1,
+    ) -> dict[str, Any]:
+        """Calculate costs from OpenAI response, accounting for cached tokens."""
        input_tokens = response.usage.prompt_tokens if response.usage else 0
        output_tokens = response.usage.completion_tokens if response.usage else 0
        total_tokens = input_tokens + output_tokens

-        pricing = self.PRICING.get(self.model, {"input": 0.15, "output": 0.60})
-        cost = (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
+        # Check for cached tokens (OpenAI returns this in newer API versions)
+        cached_tokens = 0
+        if hasattr(response.usage, "prompt_tokens_details") and response.usage.prompt_tokens_details:
+            cached_tokens = getattr(response.usage.prompt_tokens_details, "cached_tokens", 0)
+
+        uncached_input = input_tokens - cached_tokens
+
+        pricing = self.PRICING.get(self.model, {"input": 0.15, "cached_input": 0.075, "output": 0.60})
+        cost = (
+            uncached_input * pricing["input"]
+            + cached_tokens * pricing.get("cached_input", pricing["input"] * 0.5)
+            + output_tokens * pricing["output"]
+        ) / 1_000_000

        self.total_tokens_used += total_tokens
        self.total_cost_usd += cost
+        self._cached_tokens += cached_tokens

-        metadata = {
+        return {
            "model": self.model,
            "input_tokens": input_tokens,
+            "cached_tokens": cached_tokens,
            "output_tokens": output_tokens,
            "total_tokens": total_tokens,
            "cost_usd": cost,
            "latency_ms": int((time.time() - start_time) * 1000),
+            "batch_size": batch_size,
+            "tokens_per_review": total_tokens / batch_size if batch_size > 0 else 0,
        }

-        return result, metadata
+    def _parse_batch_response(
+        self,
+        batch_result: dict[str, Any],
+        original_reviews: list[BatchReviewInput],
+    ) -> list[LLMClassificationResponse]:
+        """Parse batch response into individual review results."""
+        results: list[LLMClassificationResponse] = []
+
+        # Handle both formats: {"reviews": [...]} and direct list
+        review_data = batch_result.get("reviews", [])
+        if not review_data and isinstance(batch_result, list):
+            review_data = batch_result
+
+        # Create a lookup by review_index
+        results_by_index = {r.get("review_index", i): r for i, r in enumerate(review_data)}
+
+        for i, original in enumerate(original_reviews):
+            if i in results_by_index:
+                review_result = results_by_index[i]
+                # Convert to standard format
+                results.append({
+                    "spans": review_result.get("spans", []),
+                    "review_summary": review_result.get("review_summary", {
+                        "dominant_valence": "V0",
+                        "dominant_domain": "O",
+                        "span_count": len(review_result.get("spans", [])),
+                        "has_comparative": False,
+                        "has_entity": False,
+                    }),
+                })
+            else:
+                # Missing review - create fallback
+                logger.warning(f"Review index {i} missing from batch response, using fallback")
+                results.append(create_fallback_response(original["text"]))
+
+        return results

    async def generate(
        self,
@@ -511,14 +925,16 @@ class OpenAIClient(LLMClientBase):


 class AnthropicClient(LLMClientBase):
-    """Anthropic LLM client implementation."""
+    """Anthropic LLM client implementation with batch support and prompt caching."""

    # Pricing per 1M tokens (as of 2024)
+    # Cached input tokens are 90% cheaper with Anthropic
    PRICING = {
-        "claude-3-opus-20240229": {"input": 15.0, "output": 75.0},
-        "claude-3-sonnet-20240229": {"input": 3.0, "output": 15.0},
-        "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
-        "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
+        "claude-3-opus-20240229": {"input": 15.0, "cached_input": 1.50, "output": 75.0},
+        "claude-3-sonnet-20240229": {"input": 3.0, "cached_input": 0.30, "output": 15.0},
+        "claude-3-haiku-20240307": {"input": 0.25, "cached_input": 0.03, "output": 1.25},
+        "claude-3-5-sonnet-20241022": {"input": 3.0, "cached_input": 0.30, "output": 15.0},
+        "claude-sonnet-4-20250514": {"input": 3.0, "cached_input": 0.30, "output": 15.0},
    }

    def __init__(self, config: Config):
@@ -533,13 +949,16 @@ class AnthropicClient(LLMClientBase):
        review_text: str,
        profile: str = "standard",
    ) -> tuple[LLMClassificationResponse, dict[str, Any]]:
-        """Classify using Anthropic."""
+        """Classify a single review using Anthropic."""
        start_time = time.time()

+        # Use cache_control for prompt caching
+        system_content = self._build_cached_system(self.get_prompt())
+
        response = await self.client.messages.create(
            model=self.model,
            max_tokens=4096,
-            system=self.get_prompt(),
+            system=system_content,
            messages=[
                {
                    "role": "user",
@@ -554,30 +973,161 @@ class AnthropicClient(LLMClientBase):
        if not content:
            raise ValueError("Empty response from Anthropic")

-        # Try to extract JSON from response
        result = self._extract_json(content)
+        metadata = self._calculate_anthropic_costs(response, start_time)

-        # Calculate costs
+        return result, metadata
+
+    async def classify_batch(
+        self,
+        reviews: list[BatchReviewInput],
+        profile: str = "standard",
+    ) -> tuple[list[LLMClassificationResponse], dict[str, Any]]:
+        """
+        Classify multiple reviews in a single LLM call.
+
+        Uses Anthropic's prompt caching with cache_control - the system prompt
+        is cached after first call, reducing input costs by ~90%.
+        """
+        if not reviews:
+            return [], {"error": "No reviews provided"}
+
+        start_time = time.time()
+
+        # Build batch user prompt
+        user_prompt = self._build_batch_user_prompt(reviews)
+
+        # Use cache_control for prompt caching (system prompt is cacheable)
+        system_content = self._build_cached_system(self.get_prompt(batch_mode=True))
+
+        # Calculate max tokens based on batch size
+        max_output_tokens = min(16000, len(reviews) * 500)
+
+        response = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_output_tokens,
+            system=system_content,
+            messages=[{"role": "user", "content": user_prompt}],
+            temperature=self.config.llm_temperature,
+        )
+
+        # Parse response
+        content = response.content[0].text if response.content else ""
+        if not content:
+            raise ValueError("Empty response from Anthropic")
+
+        metadata = self._calculate_anthropic_costs(response, start_time, batch_size=len(reviews))
+
+        # Try full JSON extraction first
+        try:
+            batch_result = self._extract_json(content)
+            review_results = self._parse_batch_response(batch_result, reviews)
+            return review_results, metadata
+        except (json.JSONDecodeError, ValueError) as e:
+            # Full parse failed - try partial extraction
+            logger.warning(f"Full JSON parse failed: {e}, attempting partial recovery...")
+
+            partial_reviews, missing_indices = self._extract_partial_batch_json(
+                content, len(reviews)
+            )
+
+            if partial_reviews:
+                # We recovered some results - raise PartialBatchResult
+                raise PartialBatchResult(
+                    message=f"Recovered {len(partial_reviews)}/{len(reviews)} reviews from malformed JSON",
+                    partial_results=partial_reviews,
+                    missing_indices=missing_indices,
+                    metadata=metadata,
+                )
+            else:
+                # Couldn't recover anything - re-raise original error
+                raise
+
+    def _build_cached_system(self, prompt: str) -> list[dict[str, Any]]:
+        """Build system content with cache_control for prompt caching."""
+        return [
+            {
+                "type": "text",
+                "text": prompt,
+                "cache_control": {"type": "ephemeral"},
+            }
+        ]
+
+    def _calculate_anthropic_costs(
+        self,
+        response: Any,
+        start_time: float,
+        batch_size: int = 1,
+    ) -> dict[str, Any]:
+        """Calculate costs from Anthropic response, accounting for cached tokens."""
        input_tokens = response.usage.input_tokens
        output_tokens = response.usage.output_tokens
        total_tokens = input_tokens + output_tokens

-        pricing = self.PRICING.get(self.model, {"input": 3.0, "output": 15.0})
-        cost = (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
+        # Anthropic returns cache info in usage
+        cached_tokens = getattr(response.usage, "cache_read_input_tokens", 0) or 0
+        cache_creation_tokens = getattr(response.usage, "cache_creation_input_tokens", 0) or 0
+
+        uncached_input = input_tokens - cached_tokens
+
+        pricing = self.PRICING.get(self.model, {"input": 3.0, "cached_input": 0.30, "output": 15.0})
+        cost = (
+            uncached_input * pricing["input"]
+            + cached_tokens * pricing.get("cached_input", pricing["input"] * 0.1)
+            + output_tokens * pricing["output"]
+        ) / 1_000_000

        self.total_tokens_used += total_tokens
        self.total_cost_usd += cost
+        self._cached_tokens += cached_tokens

-        metadata = {
+        return {
            "model": self.model,
            "input_tokens": input_tokens,
+            "cached_tokens": cached_tokens,
+            "cache_creation_tokens": cache_creation_tokens,
            "output_tokens": output_tokens,
            "total_tokens": total_tokens,
            "cost_usd": cost,
            "latency_ms": int((time.time() - start_time) * 1000),
+            "batch_size": batch_size,
+            "tokens_per_review": total_tokens / batch_size if batch_size > 0 else 0,
        }

-        return result, metadata
+    def _parse_batch_response(
+        self,
+        batch_result: dict[str, Any],
+        original_reviews: list[BatchReviewInput],
+    ) -> list[LLMClassificationResponse]:
+        """Parse batch response into individual review results."""
+        results: list[LLMClassificationResponse] = []
+
+        # Handle both formats: {"reviews": [...]} and direct list
+        review_data = batch_result.get("reviews", [])
+        if not review_data and isinstance(batch_result, list):
+            review_data = batch_result
+
+        # Create a lookup by review_index
+        results_by_index = {r.get("review_index", i): r for i, r in enumerate(review_data)}
+
+        for i, original in enumerate(original_reviews):
+            if i in results_by_index:
+                review_result = results_by_index[i]
+                results.append({
+                    "spans": review_result.get("spans", []),
+                    "review_summary": review_result.get("review_summary", {
+                        "dominant_valence": "V0",
+                        "dominant_domain": "O",
+                        "span_count": len(review_result.get("spans", [])),
+                        "has_comparative": False,
+                        "has_entity": False,
+                    }),
+                })
+            else:
+                logger.warning(f"Review index {i} missing from batch response, using fallback")
+                results.append(create_fallback_response(original["text"]))
+
+        return results

    async def generate(
        self,
@@ -607,7 +1157,6 @@ class AnthropicClient(LLMClientBase):
        self.total_tokens_used += input_tokens + output_tokens
        self.total_cost_usd += cost

-        # Extract JSON from response (handles code blocks)
        return self._extract_json_string(content)

    def _extract_json_string(self, content: str) -> str:
@@ -615,16 +1164,13 @@ class AnthropicClient(LLMClientBase):
        import re
        content = content.strip()

-        # If it starts with {, return as-is
        if content.startswith("{"):
            return content

-        # Try to find JSON in code blocks
        json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
        if json_match:
            return json_match.group(1)

-        # Try to find JSON object
        json_match = re.search(r"\{[\s\S]*\}", content)
        if json_match:
            return json_match.group(0)
@@ -635,26 +1181,162 @@ class AnthropicClient(LLMClientBase):
        """Extract JSON from response, handling markdown code blocks."""
        content = content.strip()

-        # Try direct parse first
        try:
            return json.loads(content)
        except json.JSONDecodeError:
            pass

-        # Try to find JSON in code blocks
        import re

        json_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
        if json_match:
            return json.loads(json_match.group(1))

-        # Try to find JSON object
        json_match = re.search(r"\{[\s\S]*\}", content)
        if json_match:
            return json.loads(json_match.group(0))

        raise ValueError(f"Could not extract JSON from response: {content[:200]}")

+    def _extract_partial_batch_json(
+        self, content: str, expected_count: int
+    ) -> tuple[list[dict[str, Any]], list[int]]:
+        """
+        Extract partial results from truncated/malformed batch JSON.
+
+        Returns:
+            Tuple of (successfully_parsed_reviews, missing_indices)
+        """
+        import re
+
+        parsed_reviews: list[dict[str, Any]] = []
+        found_indices: set[int] = set()
+
+        # Pattern to match complete review objects with review_index
+        # Matches: {"review_index": N, ... } with balanced braces
+        review_pattern = r'\{\s*"review_index"\s*:\s*(\d+)[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
+
+        # Try to find all complete review objects
+        for match in re.finditer(review_pattern, content):
+            try:
+                # Extract the matched text and try to parse
+                obj_text = match.group(0)
+
+                # Try to parse as JSON - may need to fix trailing issues
+                try:
+                    obj = json.loads(obj_text)
+                except json.JSONDecodeError:
+                    # Try adding closing brace if truncated
+                    continue
+
+                if "review_index" in obj and "spans" in obj:
+                    idx = obj["review_index"]
+                    if idx not in found_indices:
+                        parsed_reviews.append(obj)
+                        found_indices.add(idx)
+            except Exception:
+                continue
+
+        # Alternative: try parsing incrementally by finding review_index markers
+        if len(parsed_reviews) < expected_count // 2:
+            # Find all review_index positions
+            index_matches = list(re.finditer(r'"review_index"\s*:\s*(\d+)', content))
+
+            for i, match in enumerate(index_matches):
+                idx = int(match.group(1))
+                if idx in found_indices:
+                    continue
+
+                # Find the start of this review object
+                start = content.rfind('{', 0, match.start())
+                if start == -1:
+                    continue
+
+                # Find the end - either next review_index or end of content
+                if i + 1 < len(index_matches):
+                    end_search = index_matches[i + 1].start()
+                else:
+                    end_search = len(content)
+
+                # Find the closing brace
+                obj_text = content[start:end_search]
+
+                # Count braces to find proper end
+                brace_count = 0
+                end_pos = 0
+                for j, char in enumerate(obj_text):
+                    if char == '{':
+                        brace_count += 1
+                    elif char == '}':
+                        brace_count -= 1
+                        if brace_count == 0:
+                            end_pos = j + 1
+                            break
+
+                if end_pos > 0:
+                    try:
+                        obj = json.loads(obj_text[:end_pos])
+                        # Validate required fields and data integrity
+                        if self._validate_recovered_review(obj):
+                            review_idx = obj["review_index"]
+                            if review_idx not in found_indices:
+                                parsed_reviews.append(obj)
+                                found_indices.add(review_idx)
+                    except json.JSONDecodeError:
+                        continue
+
+        # Determine missing indices
+        missing_indices = [i for i in range(expected_count) if i not in found_indices]
+
+        logger.info(
+            f"Partial JSON recovery: {len(parsed_reviews)}/{expected_count} reviews recovered, "
+            f"{len(missing_indices)} missing"
+        )
+
+        return parsed_reviews, missing_indices
+
+    def _validate_recovered_review(self, obj: dict[str, Any]) -> bool:
+        """
+        Validate a recovered review has all required fields with valid data.
+
+        Returns True only if the review is complete and usable.
+        Rejects:
+        - Missing review_index or spans
+        - Empty spans array
+        - Spans missing required fields (text, urt_primary, valence, intensity)
+        - Empty field values
+        """
+        # Check required top-level fields
+        if "review_index" not in obj:
+            return False
+        if not isinstance(obj.get("review_index"), int):
+            return False
+
+        if "spans" not in obj:
+            return False
+        if not isinstance(obj["spans"], list):
+            return False
+        if len(obj["spans"]) == 0:
+            # Empty spans = no useful classification data
+            return False
+
+        # Validate each span has required fields with non-empty values
+        required_span_fields = ["text", "urt_primary", "valence", "intensity"]
+        for span in obj["spans"]:
+            if not isinstance(span, dict):
+                return False
+            for field in required_span_fields:
+                if field not in span:
+                    return False
+                if not span[field]:  # Empty string or None
+                    return False
+
+        # review_summary is optional but if present should be a dict
+        if "review_summary" in obj and not isinstance(obj["review_summary"], dict):
+            return False
+
+        return True
+
    async def close(self) -> None:
        """Close the Anthropic client."""
        await self.client.close()
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/prompt_builder.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/prompt_builder.py
@@ -0,0 +1,480 @@
+"""
+Dynamic prompt builder for URT classification.
+
+Fetches taxonomy from database to build the system prompt,
+ensuring single source of truth and including examples.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    import asyncpg
+
+logger = logging.getLogger(__name__)
+
+# Entity extraction rules for staff recognition
+ENTITY_EXTRACTION_RULES = """
+## ENTITY EXTRACTION (Staff Recognition)
+
+When a span mentions a SPECIFIC PERSON by name, extract:
+- entity: The person's name exactly as written
+- entity_type: "staff" for employees, "customer" for other people mentioned
+
+### EXTRACT (set entity + entity_type):
+- "Miglė was amazing" → entity: "Miglė", entity_type: "staff"
+- "Thank you Carlos!" → entity: "Carlos", entity_type: "staff"
+- "Adrian helped us" → entity: "Adrian", entity_type: "staff"
+- "Ačiū Artūrui" → entity: "Artūrui", entity_type: "staff"
+- "bartender Eivydas" → entity: "Eivydas", entity_type: "staff"
+- "our server Maria" → entity: "Maria", entity_type: "staff"
+
+### DO NOT EXTRACT (keep entity: null):
+- "The bartender was rude" → no specific name, keep null
+- "Staff was friendly" → generic reference, keep null
+- "Service was great" → no person mentioned, keep null
+- "The manager helped" → role only, no name, keep null
+
+### Name Recognition Tips:
+- Look for CAPITALIZED words that are NOT at sentence start
+- Common patterns: "[Name] was/is [adjective]", "thank [Name]", "[role] [Name]"
+- International names: Miglė, Eivydas, Žydrė, Artūras (Lithuanian), Carlos, María (Spanish), etc.
+- When a name appears near: bartender, waiter, server, staff, manager, helped, thank, amazing, great, rude
+
+IMPORTANT: When in doubt, extract the name. Staff recognition is valuable - false positives are acceptable.
+"""
+
+# Static parts of the prompt that don't change
+PROMPT_HEADER = """You are a review classification system using URT (Universal Review Taxonomy) v5.1.
+
+Your task is to extract semantic spans from customer reviews and classify each span independently.
+
+## SPAN EXTRACTION RULES
+
+**CRITICAL: Use TOPIC-BASED splitting, NOT sentence-based splitting.**
+
+A span = all consecutive text about the SAME topic/domain, regardless of sentence count.
+
+### When to KEEP TOGETHER (same span):
+- Multiple sentences about the same topic: "The food was great. I loved the pasta. The sauce was perfect." → ONE span (all about Offering)
+- Cause and effect: "The wait was long because they were understaffed" → ONE span
+- Elaboration: "Staff was rude. They ignored us for 20 minutes." → ONE span (both about People)
+- Single-topic reviews: Even if 5 sentences, if all about food → ONE span
+
+### When to SPLIT (separate spans):
+- Contrasting conjunctions that change topic: "Food was great BUT service was slow" → TWO spans
+- Domain change: food (O) → staff (P) → ambiance (E) = split at each change
+- Target change: "The waiter was nice but the manager was rude" → TWO spans (different people)
+
+### Examples:
+- "Amazing food. Best burger ever. Fries were crispy too." → 1 span (all Offering, V+)
+- "Food was great but we waited an hour." → 2 spans (Offering V+, Journey V-)
+- "I've been coming here for years. Always consistent quality." → 1 span (Relationship)
+- "The staff are lovely and amazing with kids. More highchairs are definitely needed though." → 2 spans (People V+, Access V-)
+
+**Guardrails**:
+- Prefer FEWER, LARGER spans over many small ones
+- Most reviews should have 1-3 spans, rarely more
+- Min 1 span per review
+- Spans must be non-overlapping
+
+## CRITICAL CLASSIFICATION RULES (Common Mistakes to Avoid)
+
+### RULE 1: Money/Price → ALWAYS use V codes (Value)
+Any mention of: price, cost, fee, charge, €, $, deposit, refund, expensive, cheap, affordable
+- ✅ "50€ extra" → V1.03 Hidden Costs
+- ✅ "good price" → V1.01 Price Level
+- ❌ NEVER use P codes for pricing (P is for People/staff behavior)
+
+### RULE 2: Staff Behavior → ALWAYS use P codes (People)
+Any mention of: friendly, rude, helpful, patient, amable, nett, simpático, attentive
+- ✅ "staff was friendly" → P1.01 Warmth
+- ✅ "rude employee" → P1.02 Respect
+- ❌ NEVER use A codes for staff behavior (A is for Access/availability)
+
+### RULE 3: Scam/Fraud/Deception → ALWAYS use R codes (Relationship)
+Any mention of: scam, estafa, fraud, lied, cheat, dishonest, robbery, Abzocker
+- ✅ "felt scammed" → R1.02 Ethics
+- ✅ "they lied" → R1.01 Honesty
+- ❌ NEVER use P or V codes for ethical issues
+
+### RULE 4: Location/Finding → Use A codes (Access)
+Difficulty finding a place, shuttle, meeting point, confusing directions
+- ✅ "couldn't find shuttle" → A1.04 Wayfinding
+- ✅ "far from airport" → A4.01 Location
+- ❌ Don't confuse with J1.02 Punctuality (which is about being on time)
+
+### RULE 5: Wait Time vs Punctuality
+- J1.01 Speed = how FAST service is ("waited 2 hours", "slow service")
+- J1.02 Punctuality = being ON TIME vs scheduled ("arrived late", "delayed")
+
+"""
+
+PROMPT_BATCH_OUTPUT_FORMAT = """
+## BATCH OUTPUT FORMAT
+
+When given multiple reviews, return a JSON object with a "reviews" array.
+Each review in the array contains its own spans and summary.
+
+{
+  "reviews": [
+    {
+      "review_index": 0,
+      "spans": [
+        {
+          "span_index": 0,
+          "span_text": "exact text from this review",
+          "span_start": 0,
+          "span_end": 25,
+          "urt_primary": "P1.01",
+          "urt_secondary": [],
+          "valence": "V+",
+          "intensity": "I2",
+          "specificity": "S2",
+          "actionability": "A1",
+          "temporal": "TC",
+          "evidence": "ES",
+          "comparative": "CR-N",
+          "is_primary": true,
+          "confidence": "high",
+          "entity": "Maria",
+          "entity_type": "staff",
+          "usn": "URT:S:P1.01:+2:21TC.ES.N"
+        }
+      ],
+      "review_summary": {
+        "dominant_valence": "V+",
+        "dominant_domain": "P",
+        "span_count": 1,
+        "has_comparative": false,
+        "has_entity": true
+      }
+    },
+    {
+      "review_index": 1,
+      "spans": [ ... ],
+      "review_summary": { ... }
+    }
+  ]
+}
+
+CRITICAL RULES FOR BATCH PROCESSING:
+1. Process each review INDEPENDENTLY - do not mix content between reviews
+2. review_index MUST match the input order (0, 1, 2, ...)
+3. span_start and span_end are relative to THAT review's text only
+4. If you see the same entity (e.g., staff name "Maria") in multiple reviews, use consistent spelling
+5. Output ALL reviews in the batch - never skip any
+6. Each review must have at least 1 span
+"""
+
+PROMPT_SINGLE_OUTPUT_FORMAT = """
+## SINGLE REVIEW OUTPUT FORMAT
+
+Return valid JSON matching this schema. No markdown, no explanations.
+
+{
+  "spans": [
+    {
+      "span_index": 0,
+      "span_text": "exact text from review",
+      "span_start": 0,
+      "span_end": 25,
+      "urt_primary": "O1.01",
+      "urt_secondary": [],
+      "valence": "V+",
+      "intensity": "I2",
+      "specificity": "S2",
+      "actionability": "A1",
+      "temporal": "TC",
+      "evidence": "ES",
+      "comparative": "CR-N",
+      "is_primary": true,
+      "confidence": "high",
+      "entity": null,
+      "entity_type": null,
+      "relation_type": null,
+      "related_span_index": null,
+      "usn": "URT:S:O1.01:+2:21TC.ES.N"
+    }
+  ],
+  "review_summary": {
+    "dominant_valence": "V+",
+    "dominant_domain": "O",
+    "span_count": 1,
+    "has_comparative": false,
+    "has_entity": false
+  }
+}
+"""
+
+PROMPT_DIMENSIONS = """
+## DIMENSION CODES
+
+### Valence
+- V+ : Positive sentiment
+- V- : Negative sentiment
+- V0 : Neutral/factual
+- V± : Mixed within the span
+
+### Intensity
+- I1 : Low ("okay", "fine", "decent")
+- I2 : Moderate ("good", "bad", "slow")
+- I3 : High ("amazing", "terrible", "unacceptable")
+
+### Specificity
+- S1 : Vague ("it was bad")
+- S2 : Some detail ("the food was cold")
+- S3 : Precise ("waited 45 minutes for appetizers")
+
+### Actionability
+- A1 : No clear action possible
+- A2 : Possible actions, unclear which
+- A3 : Clear, specific action ("train staff on X", "fix Y")
+
+### Temporal
+- TC : Current visit (default when no markers)
+- TR : Recent pattern ("lately", "recently", "again")
+- TH : Historical ("for years", "always", "used to")
+- TF : Future ("won't return", "next time", "I expect")
+
+### Evidence
+- ES : Stated explicitly in text (default)
+- EI : Inferred logically (not stated, but entailed)
+- EC : Contextual (depends on surrounding text)
+
+### Comparative
+- CR-N : No comparison (default)
+- CR-B : Better than alternatives
+- CR-W : Worse than alternatives
+- CR-S : Same as alternatives
+
+## PRIMARY SPAN SELECTION
+
+Mark exactly ONE span as is_primary=true using this order:
+1. Highest intensity (I3 > I2 > I1)
+2. Tie-break: negative over positive (V- > V± > V0 > V+)
+3. Tie-break: earliest span_index
+
+## USN (URT String Notation)
+
+Generate a USN string for each span:
+```
+URT:S:{primary}[+{sec1}][+{sec2}]:{valence_sign}{intensity_num}:{S#}{A#}{temporal}.{evidence}.{CR_suffix}
+```
+
+Examples:
+- `URT:S:J1.03:-2:22TC.ES.N` (J1.03, V-, I2, S2, A2, TC, ES, CR-N)
+- `URT:S:P1.01+O2.03:+3:33TR.ES.B` (P1.01 primary, O2.03 secondary, V+, I3, S3, A3, TR, ES, CR-B)
+
+Valence encoding: + for V+, - for V-, 0 for V0, ± for V±
+CR suffix: N=CR-N, B=CR-B, W=CR-W, S=CR-S"""
+
+# Domain-specific warnings to include
+DOMAIN_WARNINGS = {
+    "V": "USE FOR ALL PRICE/COST/FEE/MONEY MENTIONS",
+    "P": "USE FOR STAFF BEHAVIOR ONLY, NOT PRICING",
+}
+
+
+class PromptBuilder:
+    """
+    Builds the classification prompt dynamically from database taxonomy.
+
+    Usage:
+        builder = PromptBuilder(db_pool)
+        prompt = await builder.build()  # For single review
+        prompt = await builder.build(batch_mode=True)  # For batch processing
+    """
+
+    def __init__(self, pool: asyncpg.Pool):
+        self.pool = pool
+        self._cached_prompt_single: str | None = None
+        self._cached_prompt_batch: str | None = None
+        self._cached_taxonomy: str | None = None
+
+    async def build(self, force_refresh: bool = False, batch_mode: bool = False) -> str:
+        """
+        Build the complete system prompt from database taxonomy.
+
+        Args:
+            force_refresh: If True, rebuild even if cached
+            batch_mode: If True, include batch output format
+
+        Returns:
+            Complete system prompt string
+        """
+        # Check if we can use cached version
+        cache = self._cached_prompt_batch if batch_mode else self._cached_prompt_single
+        if not force_refresh and cache:
+            return cache
+
+        # Build taxonomy section (shared between single and batch)
+        if not self._cached_taxonomy or force_refresh:
+            domains = await self._fetch_domains()
+            subcodes = await self._fetch_subcodes()
+            self._cached_taxonomy = self._build_taxonomy_section(domains, subcodes)
+            logger.info(f"Built taxonomy section with {len(subcodes)} subcodes")
+
+        # Combine all parts with appropriate output format
+        output_format = PROMPT_BATCH_OUTPUT_FORMAT if batch_mode else PROMPT_SINGLE_OUTPUT_FORMAT
+        prompt = (
+            PROMPT_HEADER
+            + self._cached_taxonomy
+            + ENTITY_EXTRACTION_RULES
+            + PROMPT_DIMENSIONS
+            + output_format
+        )
+
+        # Cache it
+        if batch_mode:
+            self._cached_prompt_batch = prompt
+        else:
+            self._cached_prompt_single = prompt
+
+        logger.info(f"Built {'batch' if batch_mode else 'single'} classification prompt")
+        return prompt
+
+    async def build_cacheable_parts(self) -> tuple[str, str]:
+        """
+        Build the prompt split into cacheable (static) and dynamic parts.
+
+        For prompt caching, we want to separate:
+        - Static part (taxonomy, rules) - can be cached
+        - Dynamic part (output format) - varies by mode
+
+        Returns:
+            Tuple of (cacheable_prefix, suffix_for_batch)
+        """
+        if not self._cached_taxonomy:
+            domains = await self._fetch_domains()
+            subcodes = await self._fetch_subcodes()
+            self._cached_taxonomy = self._build_taxonomy_section(domains, subcodes)
+
+        # Static cacheable prefix (same for all calls)
+        cacheable_prefix = (
+            PROMPT_HEADER
+            + self._cached_taxonomy
+            + ENTITY_EXTRACTION_RULES
+            + PROMPT_DIMENSIONS
+        )
+
+        return cacheable_prefix, PROMPT_BATCH_OUTPUT_FORMAT
+
+    async def _fetch_domains(self) -> list[dict[str, Any]]:
+        """Fetch domain definitions from database."""
+        query = """
+            SELECT code, name, description
+            FROM pipeline.urt_domains
+            ORDER BY code
+        """
+        rows = await self.pool.fetch(query)
+        return [dict(row) for row in rows]
+
+    async def _fetch_subcodes(self) -> list[dict[str, Any]]:
+        """Fetch subcode definitions with examples from database."""
+        query = """
+            SELECT
+                code,
+                name,
+                definition,
+                positive_example,
+                negative_example
+            FROM pipeline.urt_subcodes
+            ORDER BY code
+        """
+        rows = await self.pool.fetch(query)
+        return [dict(row) for row in rows]
+
+    def _build_taxonomy_section(
+        self,
+        domains: list[dict[str, Any]],
+        subcodes: list[dict[str, Any]]
+    ) -> str:
+        """Build the taxonomy section of the prompt."""
+        # Group subcodes by domain
+        subcodes_by_domain: dict[str, list[dict]] = {}
+        for subcode in subcodes:
+            domain_code = subcode["code"][0]
+            if domain_code not in subcodes_by_domain:
+                subcodes_by_domain[domain_code] = []
+            subcodes_by_domain[domain_code].append(subcode)
+
+        # Build the section
+        lines = ["## URT TAXONOMY (Use EXACT codes from database)", ""]
+
+        for domain in domains:
+            code = domain["code"]
+            name = domain["name"]
+            desc = domain["description"]
+            domain_subcodes = subcodes_by_domain.get(code, [])
+
+            # Domain header with warning if applicable
+            warning = DOMAIN_WARNINGS.get(code, "")
+            if warning:
+                lines.append(f"### {code} - {name.upper()} ({len(domain_subcodes)} codes) ⚠️ {warning}")
+            else:
+                lines.append(f"### {code} - {name.upper()} ({len(domain_subcodes)} codes)")
+
+            # Add each subcode with definition and examples
+            for sc in domain_subcodes:
+                sc_code = sc["code"]
+                sc_name = sc["name"]
+                sc_def = sc["definition"] or sc_name
+                pos_ex = sc.get("positive_example")
+                neg_ex = sc.get("negative_example")
+
+                # Main line: code, name, definition
+                line = f"{sc_code} {sc_name}: {sc_def}"
+
+                # Add examples if available (helps LLM distinguish)
+                if pos_ex and neg_ex:
+                    line += f' [+"{pos_ex}" / -"{neg_ex}"]'
+                elif pos_ex:
+                    line += f' [+"{pos_ex}"]'
+                elif neg_ex:
+                    line += f' [-"{neg_ex}"]'
+
+                lines.append(line)
+
+            lines.append("")  # Blank line between domains
+
+        return "\n".join(lines)
+
+    def invalidate_cache(self) -> None:
+        """Invalidate the cached prompt, forcing rebuild on next call."""
+        self._cached_prompt_single = None
+        self._cached_prompt_batch = None
+        self._cached_taxonomy = None
+
+
+# Global prompt cache for when DB is not available
+_static_prompt_cache: str | None = None
+
+
+async def build_prompt_from_db(pool: asyncpg.Pool) -> str:
+    """
+    Convenience function to build prompt from database.
+
+    Args:
+        pool: Database connection pool
+
+    Returns:
+        Complete system prompt
+    """
+    builder = PromptBuilder(pool)
+    return await builder.build()
+
+
+def get_static_fallback_prompt() -> str:
+    """
+    Get a static fallback prompt when database is not available.
+    This should only be used in testing or when DB connection fails.
+    """
+    global _static_prompt_cache
+    if _static_prompt_cache is None:
+        # Import the hardcoded version as fallback
+        from reviewiq_pipeline.services.llm_client import SYSTEM_PROMPT
+        _static_prompt_cache = SYSTEM_PROMPT
+    return _static_prompt_cache
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/review_router.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/services/review_router.py
@@ -0,0 +1,375 @@
+"""
+Language-agnostic review router for cost-optimized LLM classification.
+
+Routes reviews to different processing paths based on structural signals only:
+- SKIP: Extremely low-value reviews (skip LLM entirely, assign generic code)
+- CHEAP_MODEL: Short, simple reviews (use Haiku for classification)
+- FULL_MODEL: Complex reviews (use Sonnet for full classification)
+
+IMPORTANT: All routing decisions use ONLY language-agnostic signals:
+- Word count / character count (numeric)
+- Presence of numbers in text (pattern-based)
+- Sentence count (punctuation-based)
+- Emoji-only detection (pattern-based)
+- Star rating (numeric)
+
+NO hardcoded word lists (like "great", "bueno", "gut") are used because:
+- Reviews span 7+ languages (Spanish, English, Dutch, German, Polish, Finnish, Danish, etc.)
+- Typography errors are common
+- False negatives (skipping valuable reviews) are worse than false positives
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from dataclasses import dataclass
+from enum import Enum
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from reviewiq_pipeline.contracts import ReviewToClassify
+
+logger = logging.getLogger(__name__)
+
+
+class RoutingTier(Enum):
+    """Processing tier for a review."""
+
+    SKIP = "skip"           # Skip LLM, assign generic URT code
+    CHEAP_MODEL = "cheap"   # Use fast/cheap model (Haiku)
+    FULL_MODEL = "full"     # Use full model (Sonnet)
+
+
+@dataclass
+class RoutingDecision:
+    """Result of routing decision for a review."""
+
+    tier: RoutingTier
+    reason: str
+    signals: dict[str, any]
+    # For SKIP tier, pre-assign the generic classification
+    skip_classification: dict | None = None
+
+
+@dataclass
+class RouterConfig:
+    """Configuration for the review router."""
+
+    # SKIP tier thresholds (very conservative - prefer false positives)
+    skip_max_words: int = 1
+    skip_max_chars: int = 15
+    skip_require_extreme_rating: bool = True  # Only skip if rating is 1 or 5
+
+    # CHEAP_MODEL tier thresholds
+    cheap_max_words: int = 10
+    cheap_max_chars: int = 100
+
+    # Signals that force FULL_MODEL regardless of length
+    full_model_if_has_numbers: bool = True
+    full_model_if_multiple_sentences: bool = True
+    full_model_min_sentences: int = 2
+
+
+class ReviewRouter:
+    """
+    Routes reviews to appropriate processing tier using language-agnostic signals.
+
+    Design principles:
+    - Conservative: Prefer false positives (processing simple reviews fully)
+      over false negatives (skipping valuable reviews)
+    - Language-agnostic: No word lists, only structural/numeric signals
+    - Transparent: Every decision includes the signals used
+    """
+
+    # Pattern to detect numbers (dates, amounts, room numbers, etc.)
+    NUMBER_PATTERN = re.compile(r'\d+')
+
+    # Pattern for sentence-ending punctuation (language-agnostic)
+    SENTENCE_END_PATTERN = re.compile(r'[.!?。！？]+')
+
+    # Emoji pattern (same as TextProcessor)
+    EMOJI_PATTERN = re.compile(
+        "["
+        "\U0001F600-\U0001F64F"  # emoticons
+        "\U0001F300-\U0001F5FF"  # symbols & pictographs
+        "\U0001F680-\U0001F6FF"  # transport & map symbols
+        "\U0001F1E0-\U0001F1FF"  # flags
+        "\U00002702-\U000027B0"  # dingbats
+        "\U000024C2-\U0001F251"  # enclosed characters
+        "]+",
+        flags=re.UNICODE,
+    )
+
+    # Generic classification for skipped reviews
+    GENERIC_POSITIVE = {
+        "urt_primary": "V4.03",  # Overall Satisfaction - General
+        "valence": "V+",
+        "intensity": "I1",
+        "confidence": "low",
+        "skip_reason": "auto_routed_positive",
+    }
+
+    GENERIC_NEGATIVE = {
+        "urt_primary": "V4.03",  # Overall Satisfaction - General
+        "valence": "V-",
+        "intensity": "I1",
+        "confidence": "low",
+        "skip_reason": "auto_routed_negative",
+    }
+
+    def __init__(self, config: RouterConfig | None = None):
+        self.config = config or RouterConfig()
+        self._stats = {
+            "skip": 0,
+            "cheap": 0,
+            "full": 0,
+        }
+
+    def route(self, review: ReviewToClassify) -> RoutingDecision:
+        """
+        Determine the processing tier for a review.
+
+        Args:
+            review: Review to route
+
+        Returns:
+            RoutingDecision with tier, reason, and signals
+        """
+        text = review.get("text_normalized") or review.get("text") or ""
+        rating = review.get("rating", 3)
+
+        # Extract language-agnostic signals
+        signals = self._extract_signals(text, rating)
+
+        # Decision logic (conservative - start with FULL, demote only if safe)
+        decision = self._make_decision(signals, rating)
+
+        # Update stats
+        self._stats[decision.tier.value] += 1
+
+        return decision
+
+    def route_batch(
+        self,
+        reviews: list[ReviewToClassify]
+    ) -> dict[RoutingTier, list[ReviewToClassify]]:
+        """
+        Route a batch of reviews, grouping by tier.
+
+        Args:
+            reviews: List of reviews to route
+
+        Returns:
+            Dictionary mapping tiers to lists of reviews
+        """
+        result = {
+            RoutingTier.SKIP: [],
+            RoutingTier.CHEAP_MODEL: [],
+            RoutingTier.FULL_MODEL: [],
+        }
+
+        for review in reviews:
+            decision = self.route(review)
+            # Attach routing decision to review for downstream use
+            review["_routing"] = decision
+            result[decision.tier].append(review)
+
+        logger.info(
+            f"Routed {len(reviews)} reviews: "
+            f"SKIP={len(result[RoutingTier.SKIP])}, "
+            f"CHEAP={len(result[RoutingTier.CHEAP_MODEL])}, "
+            f"FULL={len(result[RoutingTier.FULL_MODEL])}"
+        )
+
+        return result
+
+    def _extract_signals(self, text: str, rating: int) -> dict[str, any]:
+        """
+        Extract language-agnostic signals from review text.
+
+        All signals are structural/numeric, never word-based.
+        """
+        if not text:
+            return {
+                "word_count": 0,
+                "char_count": 0,
+                "has_numbers": False,
+                "sentence_count": 0,
+                "emoji_count": 0,
+                "is_emoji_only": False,
+                "rating": rating,
+                "is_extreme_rating": rating in (1, 5),
+            }
+
+        words = text.split()
+        word_count = len(words)
+        char_count = len(text)
+
+        # Check for numbers (dates, amounts, room numbers - often signal specific details)
+        has_numbers = bool(self.NUMBER_PATTERN.search(text))
+
+        # Count sentences by punctuation
+        sentences = self.SENTENCE_END_PATTERN.split(text)
+        sentence_count = len([s for s in sentences if s.strip()])
+
+        # Count emoji
+        emoji_matches = self.EMOJI_PATTERN.findall(text)
+        emoji_count = len(emoji_matches)
+
+        # Check if text is emoji-only (after stripping whitespace)
+        text_without_emoji = self.EMOJI_PATTERN.sub("", text).strip()
+        is_emoji_only = emoji_count > 0 and len(text_without_emoji) == 0
+
+        return {
+            "word_count": word_count,
+            "char_count": char_count,
+            "has_numbers": has_numbers,
+            "sentence_count": sentence_count,
+            "emoji_count": emoji_count,
+            "is_emoji_only": is_emoji_only,
+            "rating": rating,
+            "is_extreme_rating": rating in (1, 5),
+        }
+
+    def _make_decision(
+        self,
+        signals: dict[str, any],
+        rating: int
+    ) -> RoutingDecision:
+        """
+        Make routing decision based on signals.
+
+        Decision order (conservative):
+        1. Check for FULL_MODEL forcing signals first
+        2. Check for SKIP eligibility (very strict)
+        3. Check for CHEAP_MODEL eligibility
+        4. Default to FULL_MODEL
+        """
+        cfg = self.config
+
+        # FULL_MODEL forcing conditions
+        if cfg.full_model_if_has_numbers and signals["has_numbers"]:
+            return RoutingDecision(
+                tier=RoutingTier.FULL_MODEL,
+                reason="contains_numbers",
+                signals=signals,
+            )
+
+        if (cfg.full_model_if_multiple_sentences and
+            signals["sentence_count"] >= cfg.full_model_min_sentences):
+            return RoutingDecision(
+                tier=RoutingTier.FULL_MODEL,
+                reason="multiple_sentences",
+                signals=signals,
+            )
+
+        if signals["word_count"] > cfg.cheap_max_words:
+            return RoutingDecision(
+                tier=RoutingTier.FULL_MODEL,
+                reason="long_text",
+                signals=signals,
+            )
+
+        # SKIP eligibility (very strict)
+        skip_eligible = (
+            signals["word_count"] <= cfg.skip_max_words and
+            signals["char_count"] <= cfg.skip_max_chars and
+            not signals["has_numbers"] and
+            signals["sentence_count"] <= 1
+        )
+
+        if cfg.skip_require_extreme_rating:
+            skip_eligible = skip_eligible and signals["is_extreme_rating"]
+
+        if skip_eligible:
+            # Determine generic classification based on rating
+            if rating >= 4:
+                skip_class = self.GENERIC_POSITIVE.copy()
+            else:
+                skip_class = self.GENERIC_NEGATIVE.copy()
+
+            return RoutingDecision(
+                tier=RoutingTier.SKIP,
+                reason="trivial_review",
+                signals=signals,
+                skip_classification=skip_class,
+            )
+
+        # CHEAP_MODEL eligibility
+        if (signals["word_count"] <= cfg.cheap_max_words and
+            signals["char_count"] <= cfg.cheap_max_chars and
+            signals["sentence_count"] <= 1):
+            return RoutingDecision(
+                tier=RoutingTier.CHEAP_MODEL,
+                reason="short_simple_review",
+                signals=signals,
+            )
+
+        # Default to FULL_MODEL
+        return RoutingDecision(
+            tier=RoutingTier.FULL_MODEL,
+            reason="default",
+            signals=signals,
+        )
+
+    def get_stats(self) -> dict[str, int]:
+        """Get routing statistics."""
+        return self._stats.copy()
+
+    def reset_stats(self):
+        """Reset routing statistics."""
+        self._stats = {"skip": 0, "cheap": 0, "full": 0}
+
+
+def create_router(
+    conservative: bool = True,
+    skip_enabled: bool = True,
+    cheap_model_enabled: bool = True,
+) -> ReviewRouter:
+    """
+    Factory function to create a router with common configurations.
+
+    Args:
+        conservative: If True, use very strict thresholds (recommended)
+        skip_enabled: If True, allow SKIP tier
+        cheap_model_enabled: If True, allow CHEAP_MODEL tier
+
+    Returns:
+        Configured ReviewRouter instance
+    """
+    if conservative:
+        # Very conservative - only skip 1-word reviews with extreme ratings
+        config = RouterConfig(
+            skip_max_words=1,
+            skip_max_chars=15,
+            skip_require_extreme_rating=True,
+            cheap_max_words=10,
+            cheap_max_chars=100,
+            full_model_if_has_numbers=True,
+            full_model_if_multiple_sentences=True,
+            full_model_min_sentences=2,
+        )
+    else:
+        # Less conservative - skip more, cheaper processing
+        config = RouterConfig(
+            skip_max_words=3,
+            skip_max_chars=30,
+            skip_require_extreme_rating=False,
+            cheap_max_words=15,
+            cheap_max_chars=150,
+            full_model_if_has_numbers=True,
+            full_model_if_multiple_sentences=True,
+            full_model_min_sentences=3,
+        )
+
+    # Override if tiers disabled
+    if not skip_enabled:
+        config.skip_max_words = 0
+        config.skip_max_chars = 0
+
+    if not cheap_model_enabled:
+        config.cheap_max_words = 0
+        config.cheap_max_chars = 0
+
+    return ReviewRouter(config)
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage1_normalize.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage1_normalize.py
@@ -205,10 +205,11 @@ class Stage1Normalizer:
            source="google",
        )

-        # Insert enriched review stub
+        # Insert enriched review stub with job_id
        await self.review_repo.insert_enriched_review(
            normalized,
            raw_id,
+            job_id=input_data.get("job_id"),
        )

        return raw_id
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage2_classify.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage2_classify.py
@@ -4,15 +4,21 @@ Stage 2: LLM Classification
 Classify normalized reviews into URT codes with span-level extraction.

 Responsibilities:
- Call LLM for span extraction and classification
+- Call LLM for span extraction and classification (batched for efficiency)
 - Generate embeddings
 - Calculate trust scores
 - Select primary span
 - Write to reviews_enriched and review_spans tables
+
+Efficiency Features:
+- Batch processing: Multiple reviews per LLM call (configurable batch_size)
+- Prompt caching: System prompt cached to reduce input token costs
+- Parallel execution: Multiple batches processed concurrently
 """

 from __future__ import annotations

+import asyncio
 import hashlib
 import logging
 import re
@@ -27,7 +33,20 @@ from reviewiq_pipeline.contracts import (
    Stage2Output,
    Stage2Stats,
 )
-from reviewiq_pipeline.services.llm_client import LLMClient, create_fallback_response
+from reviewiq_pipeline.services.llm_client import (
+    LLMClient,
+    create_fallback_response,
+    BatchReviewInput,
+    BatchSizer,
+    PartialBatchResult,
+)
+from reviewiq_pipeline.services.prompt_builder import PromptBuilder
+from reviewiq_pipeline.services.classification_validator import validate_classification
+from reviewiq_pipeline.services.review_router import (
+    ReviewRouter,
+    RoutingTier,
+    create_router,
+)

 if TYPE_CHECKING:
    from reviewiq_pipeline.config import Config
@@ -75,22 +94,100 @@ class Stage2Classifier:
        self.span_repo = span_repo
        self.embedding_service = embedding_service
        self._llm_client: LLMClientBase | None = None
+        self._cheap_llm_client: LLMClientBase | None = None  # For CHEAP tier
+        self._prompt_builder: PromptBuilder | None = None
+        self._batch_sizer: BatchSizer | None = None
+        self._system_prompt_tokens: int = 0
+
+        # Initialize router if enabled
+        self._router: ReviewRouter | None = None
+        if config.router_enabled:
+            self._router = create_router(
+                conservative=config.router_conservative,
+                skip_enabled=config.router_skip_enabled,
+                cheap_model_enabled=config.router_cheap_model_enabled,
+            )
+            logger.info(
+                f"Review router enabled: conservative={config.router_conservative}, "
+                f"skip={config.router_skip_enabled}, cheap={config.router_cheap_model_enabled}"
+            )

    async def _get_llm_client(self) -> LLMClientBase:
-        """Get or create LLM client."""
+        """Get or create LLM client with dynamic prompt from database."""
        if self._llm_client is None:
            self._llm_client = LLMClient.create(self.config)
+
+            # Build prompt dynamically from database if available
+            batch_prompt = None
+            if self.db and self.db.pool:
+                try:
+                    self._prompt_builder = PromptBuilder(self.db.pool)
+                    # Build both single and batch prompts
+                    single_prompt = await self._prompt_builder.build(batch_mode=False)
+                    batch_prompt = await self._prompt_builder.build(batch_mode=True)
+                    self._llm_client.set_prompt(single_prompt, batch_prompt)
+                    logger.info("Using dynamic prompts from database taxonomy (single + batch)")
+                except Exception as e:
+                    logger.warning(f"Failed to build dynamic prompt, using static: {e}")
+
+            # Estimate system prompt tokens for batch sizing
+            prompt_for_sizing = batch_prompt or self._llm_client.get_prompt(batch_mode=True)
+            self._system_prompt_tokens = len(prompt_for_sizing) // 4  # ~4 chars per token
+
+            # Initialize batch sizer
+            self._batch_sizer = BatchSizer(
+                model=self.config.llm_model,
+                system_prompt_tokens=self._system_prompt_tokens,
+                target_utilization=self.config.classification_target_utilization,
+            )
+            logger.info(
+                f"BatchSizer initialized: model={self.config.llm_model}, "
+                f"system_prompt_tokens≈{self._system_prompt_tokens}, "
+                f"target_utilization={self.config.classification_target_utilization:.0%}"
+            )
+
        return self._llm_client

+    async def _get_cheap_llm_client(self) -> LLMClientBase:
+        """Get or create cheap LLM client for CHEAP tier routing."""
+        if self._cheap_llm_client is None:
+            # Create a copy of config with cheap model
+            from copy import copy
+            cheap_config = copy(self.config)
+            cheap_config.llm_model = self.config.router_cheap_model
+
+            self._cheap_llm_client = LLMClient.create(cheap_config)
+
+            # Use same prompts as main client
+            if self._llm_client:
+                single_prompt = self._llm_client.get_prompt(batch_mode=False)
+                batch_prompt = self._llm_client.get_prompt(batch_mode=True)
+                self._cheap_llm_client.set_prompt(single_prompt, batch_prompt)
+
+            logger.info(f"Cheap LLM client initialized with model: {self.config.router_cheap_model}")
+
+        return self._cheap_llm_client
+
    async def close(self) -> None:
        """Close resources."""
        if self._llm_client:
            await self._llm_client.close()
            self._llm_client = None
+        if self._cheap_llm_client:
+            await self._cheap_llm_client.close()
+            self._cheap_llm_client = None

    async def process(self, input_data: Stage2Input) -> Stage2Output:
        """
-        Process reviews through classification stage.
+        Process reviews through classification stage using batched LLM calls.
+
+        This method:
+        1. Routes reviews to appropriate tier (if router enabled)
+        2. Calculates optimal batch size based on context window and review lengths
+        3. Splits reviews into batches dynamically
+        4. Processes batches in parallel (with concurrency limit)
+        5. Uses prompt caching to reduce costs on subsequent batches
+        6. Adapts batch size based on actual token usage

        Args:
            input_data: Stage 2 input with reviews and config
@@ -99,65 +196,284 @@ class Stage2Classifier:
            Stage2Output with classified reviews and stats
        """
        batch_id = str(uuid.uuid4())[:8]
-        logger.info(
-            f"Stage 2: Classifying {len(input_data['reviews'])} reviews "
-            f"(batch {batch_id})"
-        )
-
-        classified_reviews: list[ClassifiedReview] = []
-        total_tokens = 0
-        total_cost = 0.0
-        total_spans = 0
-        error_count = 0
+        reviews = input_data["reviews"]
+        max_concurrent = self.config.classification_max_concurrent
+        fixed_batch_size = self.config.classification_batch_size  # 0 = auto

        llm_client = await self._get_llm_client()

-        for review in input_data["reviews"]:
-            try:
-                classified, metadata = await self._classify_review(
-                    review,
-                    input_data["config"]["profile"],
-                    llm_client,
-                    batch_id,
-                )
+        # Smart routing (if enabled)
+        skip_classified: list[ClassifiedReview] = []
+        reviews_to_process = reviews
+        cheap_reviews: list[ReviewToClassify] = []
+        full_reviews: list[ReviewToClassify] = []

-                if classified:
-                    classified_reviews.append(classified)
-                    total_spans += len(classified.get("spans", []))
-                    total_tokens += metadata.get("total_tokens", 0)
-                    total_cost += metadata.get("cost_usd", 0.0)
+        if self._router:
+            routed = self._router.route_batch(reviews)

-                    # Persist to database if configured
+            # Process SKIP tier immediately (no LLM)
+            for review in routed[RoutingTier.SKIP]:
+                routing = review.get("_routing")
+                if routing and routing.skip_classification:
+                    classified = self._create_skip_classification(
+                        review,
+                        routing.skip_classification,
+                        batch_id,
+                    )
+                    skip_classified.append(classified)
+
+                    # Persist if configured
                    if self.review_repo and self.span_repo:
                        await self._persist_classification(
-                            classified,
-                            review,
+                            classified, review, batch_id, input_data["config"]
+                        )
+
+            cheap_reviews = routed[RoutingTier.CHEAP_MODEL]
+            full_reviews = routed[RoutingTier.FULL_MODEL]
+
+            router_stats = self._router.get_stats()
+            logger.info(
+                f"Router results: SKIP={len(routed[RoutingTier.SKIP])}, "
+                f"CHEAP={len(cheap_reviews)}, FULL={len(full_reviews)}"
+            )
+
+            # If no cheap model enabled, merge into full
+            if not self.config.router_cheap_model_enabled:
+                full_reviews = cheap_reviews + full_reviews
+                cheap_reviews = []
+        else:
+            # No router - all reviews go to full model
+            full_reviews = reviews
+
+        # Calculate optimal batch size dynamically (based on full_reviews)
+        all_llm_reviews = full_reviews + cheap_reviews  # Combined for batch sizing
+        if all_llm_reviews:
+            review_dicts = [{"text": r["text"]} for r in all_llm_reviews]
+            batch_calc = self._batch_sizer.calculate_batch_size(
+                reviews=review_dicts,
+                fixed_size=fixed_batch_size if fixed_batch_size > 0 else None,
+            )
+            batch_size = batch_calc.batch_size
+            logger.info(f"Batch sizing: {batch_calc.reasoning}")
+        else:
+            batch_size = fixed_batch_size or 25
+
+        llm_review_count = len(full_reviews) + len(cheap_reviews)
+        logger.info(
+            f"Stage 2: Classifying {len(reviews)} reviews "
+            f"(batch_id={batch_id}, batch_size={batch_size}, max_concurrent={max_concurrent}, "
+            f"skip={len(skip_classified)}, llm={llm_review_count})"
+        )
+
+        # Split FULL tier reviews into batches
+        full_batches = [
+            full_reviews[i:i + batch_size]
+            for i in range(0, len(full_reviews), batch_size)
+        ] if full_reviews else []
+
+        # Split CHEAP tier reviews into batches
+        cheap_batches = [
+            cheap_reviews[i:i + batch_size]
+            for i in range(0, len(cheap_reviews), batch_size)
+        ] if cheap_reviews else []
+
+        logger.info(
+            f"Split into {len(full_batches)} FULL batches + {len(cheap_batches)} CHEAP batches "
+            f"({'unlimited' if max_concurrent == 0 else max_concurrent} concurrent)"
+        )
+
+        # Process batches - unlimited concurrency by default (0 = no limit)
+        semaphore = asyncio.Semaphore(max_concurrent) if max_concurrent > 0 else None
+        total_tokens = 0
+        total_cost = 0.0
+        total_cached_tokens = 0
+        classified_reviews: list[ClassifiedReview] = []
+        error_count = 0
+
+        # Get cheap client if needed
+        cheap_client = None
+        if cheap_batches:
+            cheap_client = await self._get_cheap_llm_client()
+
+        async def process_batch(
+            batch_reviews: list[ReviewToClassify],
+            batch_num: int,
+            client: LLMClientBase,
+            tier_label: str = "FULL",
+        ):
+            """Process a single batch of reviews."""
+
+            async def do_batch():
+                nonlocal total_tokens, total_cost, total_cached_tokens, error_count
+                try:
+                    batch_classified, batch_metadata = await self._classify_batch(
+                        batch_reviews,
+                        input_data["config"]["profile"],
+                        client,
+                        batch_id,
+                        input_data["config"],
+                    )
+
+                    batch_tokens = batch_metadata.get("total_tokens", 0)
+                    batch_cost = batch_metadata.get("cost_usd", 0.0)
+                    batch_cached = batch_metadata.get("cached_tokens", 0)
+
+                    total_tokens += batch_tokens
+                    total_cost += batch_cost
+                    total_cached_tokens += batch_cached
+
+                    # Update batch sizer with actual token usage for adaptive sizing
+                    if self._batch_sizer:
+                        input_tokens = batch_metadata.get("input_tokens", 0)
+                        output_tokens = batch_metadata.get("output_tokens", 0)
+                        self._batch_sizer.update_from_response(
+                            batch_size=len(batch_reviews),
+                            input_tokens=input_tokens - self._system_prompt_tokens,  # Exclude system prompt
+                            output_tokens=output_tokens,
+                        )
+
+                    total_batches = len(full_batches) + len(cheap_batches)
+                    logger.info(
+                        f"[{tier_label}] Batch {batch_num}/{total_batches}: "
+                        f"{len(batch_classified)} reviews, "
+                        f"{batch_tokens:,} tokens ({batch_cached:,} cached), "
+                        f"${batch_cost:.4f}"
+                    )
+
+                    return batch_classified
+
+                except PartialBatchResult as e:
+                    # Partial success - we recovered some reviews
+                    logger.info(
+                        f"Batch {batch_num} partial success: {len(e.partial_results)} recovered, "
+                        f"{len(e.missing_indices)} need reprocessing"
+                    )
+
+                    # Process the recovered results
+                    partial_classified: list[ClassifiedReview] = []
+                    profile = input_data["config"]["profile"]
+
+                    for partial_review in e.partial_results:
+                        idx = partial_review.get("review_index", -1)
+                        if 0 <= idx < len(batch_reviews):
+                            review = batch_reviews[idx]
+                            try:
+                                classified = self._process_llm_response(
+                                    review,
+                                    {
+                                        "spans": partial_review.get("spans", []),
+                                        "review_summary": partial_review.get("review_summary", {}),
+                                    },
+                                    profile,
+                                    batch_id,
+                                    is_fallback=False,
+                                )
+                                partial_classified.append(classified)
+
+                                if self.review_repo and self.span_repo:
+                                    await self._persist_classification(
+                                        classified, review, batch_id, input_data["config"]
+                                    )
+                            except Exception as pe:
+                                logger.warning(f"Error processing recovered review {idx}: {pe}")
+                                e.missing_indices.append(idx)
+
+                    # Update cost tracking from partial metadata
+                    if e.metadata:
+                        total_tokens += e.metadata.get("total_tokens", 0)
+                        total_cost += e.metadata.get("cost_usd", 0.0)
+                        total_cached_tokens += e.metadata.get("cached_tokens", 0)
+
+                    # Only fallback process the missing reviews
+                    if e.missing_indices:
+                        missing_reviews = [batch_reviews[i] for i in e.missing_indices if 0 <= i < len(batch_reviews)]
+                        error_count += len(missing_reviews)
+                        logger.info(f"Reprocessing {len(missing_reviews)} missing reviews individually")
+                        fallback_results = await self._fallback_individual_processing(
+                            missing_reviews,
+                            input_data["config"]["profile"],
+                            client,  # Use same client as batch
                            batch_id,
                            input_data["config"],
                        )
+                        partial_classified.extend(fallback_results)

-            except Exception as e:
-                logger.error(
-                    f"Error classifying review {review['review_id']}: {e}",
-                    exc_info=True,
-                )
-                error_count += 1
+                    return partial_classified

+                except Exception as e:
+                    logger.error(f"[{tier_label}] Batch {batch_num} failed: {e}", exc_info=True)
+                    error_count += len(batch_reviews)
+                    # Fallback: process individually
+                    return await self._fallback_individual_processing(
+                        batch_reviews,
+                        input_data["config"]["profile"],
+                        client,  # Use same client as batch
+                        batch_id,
+                        input_data["config"],
+                    )
+
+            # Run with or without semaphore
+            if semaphore:
+                async with semaphore:
+                    return await do_batch()
+            else:
+                return await do_batch()
+
+        # Process all batches concurrently (both FULL and CHEAP tiers)
+        all_batch_tasks = []
+
+        # FULL tier batches
+        for i, batch in enumerate(full_batches):
+            all_batch_tasks.append(
+                process_batch(batch, i + 1, llm_client, "FULL")
+            )
+
+        # CHEAP tier batches
+        for i, batch in enumerate(cheap_batches):
+            all_batch_tasks.append(
+                process_batch(batch, len(full_batches) + i + 1, cheap_client, "CHEAP")
+            )
+
+        batch_results = await asyncio.gather(*all_batch_tasks) if all_batch_tasks else []
+
+        # Flatten results from LLM processing
+        for batch_result in batch_results:
+            classified_reviews.extend(batch_result)
+
+        # Add skip-classified reviews (no LLM)
+        classified_reviews.extend(skip_classified)
+
+        # Calculate stats
+        total_spans = sum(len(r.get("spans", [])) for r in classified_reviews)
        avg_spans = total_spans / len(classified_reviews) if classified_reviews else 0

+        # Log final statistics
+        skip_count = len(skip_classified)
+        llm_count = len(classified_reviews) - skip_count
        logger.info(
-            f"Stage 2 complete: {len(classified_reviews)} classified, "
-            f"{error_count} errors, {total_spans} spans total"
+            f"Stage 2 complete: {len(classified_reviews)} classified "
+            f"(LLM={llm_count}, skipped={skip_count}), "
+            f"{error_count} errors, {total_spans} spans total, "
+            f"${total_cost:.4f} cost, {total_cached_tokens:,} cached tokens"
        )

+        if self._batch_sizer:
+            stats = self._batch_sizer.get_stats_summary()
+            logger.info(
+                f"Batch sizing stats: "
+                f"avg_input={stats['avg_input_tokens']} tokens/review, "
+                f"avg_output={stats['avg_output_tokens']} tokens/review, "
+                f"range=[{stats['min_review_tokens']}-{stats['max_review_tokens']}]"
+            )
+
        return Stage2Output(
            batch_id=batch_id,
            taxonomy_version=input_data["config"]["taxonomy_version"],
            model_version=self.config.llm_model,
-            prompt_version="v1.0",
+            prompt_version="v2.0-batched",
            reviews_classified=classified_reviews,
            stats=Stage2Stats(
-                input_count=len(input_data["reviews"]),
+                input_count=len(reviews),
                success_count=len(classified_reviews),
                error_count=error_count,
                total_spans=total_spans,
@@ -167,42 +483,127 @@ class Stage2Classifier:
            ),
        )

-    async def _classify_review(
+    async def _classify_batch(
        self,
-        review: ReviewToClassify,
+        reviews: list[ReviewToClassify],
        profile: str,
        llm_client: LLMClientBase,
        batch_id: str,
-    ) -> tuple[ClassifiedReview | None, dict[str, Any]]:
+        config: dict[str, Any],
+    ) -> tuple[list[ClassifiedReview], dict[str, Any]]:
        """
-        Classify a single review.
+        Classify a batch of reviews in a single LLM call.

        Args:
-            review: Review to classify
+            reviews: List of reviews to classify
            profile: Classification profile
            llm_client: LLM client instance
            batch_id: Batch identifier
+            config: Classification config

        Returns:
-            Tuple of (classified review, metadata)
+            Tuple of (list of classified reviews, aggregated metadata)
        """
-        metadata: dict[str, Any] = {}
-
-        # Call LLM for classification
-        try:
-            llm_response, llm_metadata = await llm_client.classify(
-                review["text"],
-                profile,
+        # Prepare batch input
+        batch_input: list[BatchReviewInput] = [
+            BatchReviewInput(
+                review_id=r["review_id"],
+                text=r["text"],
+                rating=r["rating"],
            )
-            metadata.update(llm_metadata)
-        except Exception as e:
-            logger.warning(
-                f"LLM classification failed for {review['review_id']}, "
-                f"using fallback: {e}"
-            )
-            llm_response = create_fallback_response(review["text"])
-            metadata["fallback"] = True
+            for r in reviews
+        ]

+        # Call LLM for batch classification
+        llm_responses, metadata = await llm_client.classify_batch(batch_input, profile)
+
+        # Process each response
+        classified_reviews: list[ClassifiedReview] = []
+
+        for i, (review, llm_response) in enumerate(zip(reviews, llm_responses)):
+            try:
+                classified = self._process_llm_response(
+                    review,
+                    llm_response,
+                    profile,
+                    batch_id,
+                    is_fallback=False,
+                )
+                classified_reviews.append(classified)
+
+                # Persist to database if configured
+                if self.review_repo and self.span_repo:
+                    await self._persist_classification(
+                        classified,
+                        review,
+                        batch_id,
+                        config,
+                    )
+
+            except Exception as e:
+                logger.warning(f"Error processing review {review['review_id']}: {e}")
+                # Use fallback for this review
+                fallback = create_fallback_response(review["text"])
+                classified = self._process_llm_response(
+                    review, fallback, profile, batch_id, is_fallback=True
+                )
+                classified_reviews.append(classified)
+
+        return classified_reviews, metadata
+
+    async def _fallback_individual_processing(
+        self,
+        reviews: list[ReviewToClassify],
+        profile: str,
+        llm_client: LLMClientBase,
+        batch_id: str,
+        config: dict[str, Any],
+    ) -> list[ClassifiedReview]:
+        """
+        Fallback to individual processing when batch fails.
+
+        This ensures we can still classify reviews even if batching fails.
+        """
+        logger.warning(f"Falling back to individual processing for {len(reviews)} reviews")
+        classified_reviews: list[ClassifiedReview] = []
+
+        for review in reviews:
+            try:
+                classified, _ = await self._classify_review(
+                    review, profile, llm_client, batch_id
+                )
+                if classified:
+                    classified_reviews.append(classified)
+
+                    if self.review_repo and self.span_repo:
+                        await self._persist_classification(
+                            classified, review, batch_id, config
+                        )
+
+            except Exception as e:
+                logger.error(f"Individual classification failed for {review['review_id']}: {e}")
+                # Use fallback
+                fallback = create_fallback_response(review["text"])
+                classified = self._process_llm_response(
+                    review, fallback, profile, batch_id, is_fallback=True
+                )
+                classified_reviews.append(classified)
+
+        return classified_reviews
+
+    def _process_llm_response(
+        self,
+        review: ReviewToClassify,
+        llm_response: LLMClassificationResponse,
+        profile: str,
+        batch_id: str,
+        is_fallback: bool = False,
+    ) -> ClassifiedReview:
+        """
+        Process an LLM response into a ClassifiedReview.
+
+        This is shared logic for both batch and individual processing.
+        """
        # Validate and fix response
        llm_response = self._validate_and_fix_response(llm_response, review["text"])

@@ -217,7 +618,10 @@ class Stage2Classifier:
        # Ensure exactly one primary span
        spans = self._ensure_primary_span(spans)

-        # Find the primary span for review-level classification
+        # Post-LLM validation
+        spans = self._validate_span_classifications(spans)
+
+        # Find primary span
        primary_span = next((s for s in spans if s.get("is_primary")), spans[0] if spans else None)

        # Generate embedding
@@ -247,10 +651,59 @@ class Stage2Classifier:
            embedding=embedding,
            spans=spans,
            classification_confidence={
-                "overall": 0.8 if not metadata.get("fallback") else 0.3
+                "overall": 0.8 if not is_fallback else 0.3
            },
-            processing_time_ms=metadata.get("latency_ms", 0),
-        ), metadata
+            processing_time_ms=0,  # Set at batch level
+        )
+
+    async def _classify_review(
+        self,
+        review: ReviewToClassify,
+        profile: str,
+        llm_client: LLMClientBase,
+        batch_id: str,
+    ) -> tuple[ClassifiedReview | None, dict[str, Any]]:
+        """
+        Classify a single review (used for fallback when batching fails).
+
+        Args:
+            review: Review to classify
+            profile: Classification profile
+            llm_client: LLM client instance
+            batch_id: Batch identifier
+
+        Returns:
+            Tuple of (classified review, metadata)
+        """
+        metadata: dict[str, Any] = {}
+        is_fallback = False
+
+        # Call LLM for classification
+        try:
+            llm_response, llm_metadata = await llm_client.classify(
+                review["text"],
+                profile,
+            )
+            metadata.update(llm_metadata)
+        except Exception as e:
+            logger.warning(
+                f"LLM classification failed for {review['review_id']}, "
+                f"using fallback: {e}"
+            )
+            llm_response = create_fallback_response(review["text"])
+            metadata["fallback"] = True
+            is_fallback = True
+
+        # Use shared processing logic
+        classified = self._process_llm_response(
+            review,
+            llm_response,
+            profile,
+            batch_id,
+            is_fallback=is_fallback,
+        )
+
+        return classified, metadata

    def _validate_and_fix_response(
        self,
@@ -405,6 +858,45 @@ class Stage2Classifier:

        return spans

+    def _validate_span_classifications(
+        self,
+        spans: list[ExtractedSpan],
+    ) -> list[ExtractedSpan]:
+        """
+        Post-LLM validation to catch common misclassifications.
+
+        Uses keyword detection to identify obvious errors like:
+        - Price mentions classified as P codes (should be V)
+        - Staff behavior classified as A codes (should be P)
+        - Scam mentions classified as P/V codes (should be R)
+
+        Args:
+            spans: List of classified spans
+
+        Returns:
+            List of spans with corrections applied
+        """
+        corrections = 0
+        for span in spans:
+            correction = validate_classification(
+                span.get("span_text", ""),
+                span.get("urt_primary", "O1.01"),
+                span.get("valence", "V0"),
+            )
+            if correction:
+                original = span["urt_primary"]
+                span["urt_primary"] = correction["suggested_urt"]
+                corrections += 1
+                logger.debug(
+                    f"Validation corrected {original} → {correction['suggested_urt']} "
+                    f"({correction['reason']})"
+                )
+
+        if corrections:
+            logger.info(f"Post-LLM validation corrected {corrections} spans")
+
+        return spans
+
    def _calculate_trust_score(
        self,
        review: ReviewToClassify,
@@ -467,6 +959,72 @@ class Stage2Classifier:
                quotes[code] = span["span_text"][:100]
        return quotes

+    def _create_skip_classification(
+        self,
+        review: ReviewToClassify,
+        skip_classification: dict,
+        batch_id: str,
+    ) -> ClassifiedReview:
+        """
+        Create a ClassifiedReview for a SKIP tier review (no LLM).
+
+        Args:
+            review: Source review
+            skip_classification: Pre-assigned classification from router
+            batch_id: Batch identifier
+
+        Returns:
+            ClassifiedReview with generic classification
+        """
+        urt_primary = skip_classification.get("urt_primary", "V4.03")
+        valence = skip_classification.get("valence", "V0")
+        intensity = skip_classification.get("intensity", "I1")
+
+        # Create a single span for the entire review
+        span_key = f"{review['review_id']}:0:{review['text'][:50]}"
+        span_hash = hashlib.sha256(span_key.encode()).hexdigest()[:16]
+        span_id = f"SPN-{span_hash}"
+
+        span = ExtractedSpan(
+            span_id=span_id,
+            span_index=0,
+            span_text=review["text"],
+            span_start=0,
+            span_end=len(review["text"]),
+            profile="lite",  # type: ignore
+            urt_primary=urt_primary,
+            urt_secondary=[],
+            valence=valence,
+            intensity=intensity,
+            comparative="CR-N",
+            confidence="low",
+            usn=f"URT:S:{urt_primary}:{valence[1]}{intensity[1]}:11TC.ES.N",
+            is_primary=True,
+        )
+
+        # Generate embedding if available
+        embedding: list[float] = []
+        if self.embedding_service:
+            embedding = self.embedding_service.embed(review.get("text_normalized", review["text"]))
+
+        return ClassifiedReview(
+            source=review["source"],
+            review_id=review["review_id"],
+            review_version=review["review_version"],
+            urt_primary=urt_primary,
+            urt_secondary=[],
+            valence=valence,
+            intensity=intensity,
+            comparative="CR-N",
+            staff_mentions=[],
+            quotes={},
+            trust_score=self.config.trust_score_floor,  # Minimum trust for skipped reviews
+            embedding=embedding,
+            spans=[span],
+            classification_confidence={"overall": 0.2, "skip_reason": skip_classification.get("skip_reason", "auto_routed")},
+            processing_time_ms=0,
+        )
+
    def _generate_usn(self, span: LLMSpanResponse) -> str:
        """
        Generate USN (URT String Notation) for a span.
@@ -536,4 +1094,5 @@ class Stage2Classifier:
                batch_id,
                self.config.llm_model,
                config["taxonomy_version"],
+                job_id=config.get("job_id"),
            )
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage3_route.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage3_route.py
@@ -69,6 +69,9 @@ class Stage3Router:
        """
        logger.info(f"Stage 3: Routing {len(input_data['spans'])} spans")

+        # Get job_id from input (may be None)
+        job_id = input_data.get("job_id")
+
        routed_spans: list[RoutedSpan] = []
        issues_created: list[str] = []
        issues_updated: list[str] = []
@@ -81,7 +84,7 @@ class Stage3Router:
                    spans_skipped += 1
                    continue

-                routed = await self._route_span(span)
+                routed = await self._route_span(span, job_id=job_id)
                if routed:
                    routed_spans.append(routed)

@@ -114,12 +117,13 @@ class Stage3Router:
            ),
        )

-    async def _route_span(self, span: SpanToRoute) -> RoutedSpan | None:
+    async def _route_span(self, span: SpanToRoute, job_id: str | None = None) -> RoutedSpan | None:
        """
        Route a single span to an issue.

        Args:
            span: Span to route
+            job_id: Optional job ID to link issues to pipeline executions

        Returns:
            RoutedSpan with routing info, or None if skipped
@@ -149,6 +153,7 @@ class Stage3Router:
                entity=span.get("entity_normalized"),
                entity_normalized=span.get("entity_normalized"),
                taxonomy_version=self.config.taxonomy_version,
+                job_id=job_id,
            )

        routed = RoutedSpan(
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage4_aggregate.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage4_aggregate.py
@@ -194,25 +194,24 @@ class Stage4Aggregator:
        else:
            raise ValueError(f"Unknown bucket type: {bucket_type}")

-    def _get_period_date(self, target_date: date, bucket_type: str) -> str:
-        """Get the period date string for a bucket."""
+    def _get_period_date(self, target_date: date, bucket_type: str) -> date:
+        """Get the period date for a bucket."""
        if bucket_type == "day":
-            return target_date.isoformat()
+            return target_date
        elif bucket_type == "week":
            # Week starts on Monday
-            start = target_date - timedelta(days=target_date.weekday())
-            return start.isoformat()
+            return target_date - timedelta(days=target_date.weekday())
        elif bucket_type == "month":
-            return target_date.replace(day=1).isoformat()
+            return target_date.replace(day=1)
        else:
-            return target_date.isoformat()
+            return target_date

    def _aggregate_by_code(
        self,
        span_data: list[dict[str, Any]],
        business_id: str,
        place_id: str,
-        period_date: str,
+        period_date: date,
        bucket_type: str,
        taxonomy_version: str,
    ) -> list[FactRecord]:
@@ -243,7 +242,7 @@ class Stage4Aggregator:
        span_data: list[dict[str, Any]],
        business_id: str,
        place_id: str,
-        period_date: str,
+        period_date: date,
        bucket_type: str,
        taxonomy_version: str,
    ) -> list[FactRecord]:
@@ -275,7 +274,7 @@ class Stage4Aggregator:
        span_data: list[dict[str, Any]],
        business_id: str,
        place_id: str,
-        period_date: str,
+        period_date: date,
        bucket_type: str,
        taxonomy_version: str,
    ) -> FactRecord:
@@ -296,7 +295,7 @@ class Stage4Aggregator:
        spans: list[dict[str, Any]],
        business_id: str,
        place_id: str,
-        period_date: str,
+        period_date: date,
        bucket_type: str,
        subject_type: str,
        subject_id: str,
@@ -449,7 +448,7 @@ class Stage4Aggregator:
        self,
        business_id: str,
        place_id: str,
-        period_date: str,
+        period_date: date,
        bucket_type: str,
        subject_type: str,
        subject_id: str,
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage4_synthesize.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage4_synthesize.py
@@ -1,477 +0,0 @@
-"""
-Stage 4: Synthesize - Generate AI narratives and action plans.
-
-This stage runs after classification and routing to produce:
- Executive narrative (business-specific story)
- Section insights (sentiment, category, timeline)
- Action plan with prioritized recommendations
- Timeline annotations for key events
- Marketing angles from strengths
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from dataclasses import dataclass, field
-from datetime import datetime
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    import asyncpg
-
-from reviewiq_pipeline.services.llm_client import LLMClientBase
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ActionItem:
-    """A specific action recommendation."""
-    id: str
-    title: str
-    why: str
-    what: str
-    who: str
-    impact: str
-    evidence: list[str]
-    estimated_rating_lift: float | None
-    complexity: str  # 'quick' | 'medium' | 'complex'
-    priority: str    # 'critical' | 'high' | 'medium' | 'low'
-    timeline: str
-    related_subcode: str
-
-
-@dataclass
-class TimelineAnnotation:
-    """An annotation for a key event on the timeline."""
-    date: str
-    label: str
-    description: str
-    type: str  # 'positive' | 'negative' | 'neutral' | 'event'
-
-
-@dataclass
-class Synthesis:
-    """Complete synthesis output from Stage 4."""
-    executive_narrative: str
-    sentiment_insight: str
-    category_insight: str
-    timeline_insight: str
-    priority_domain: str | None
-    priority_issue: str | None
-    action_plan: list[ActionItem]
-    issue_actions: dict[str, str]
-    timeline_annotations: list[TimelineAnnotation]
-    marketing_angles: list[str]
-    competitor_context: str | None
-    generated_at: str
-
-
-SYNTHESIS_SYSTEM_PROMPT = """You are an expert business analyst specializing in customer experience and review analysis.
-
-Your task is to analyze classified review data and generate actionable business insights.
-
-You will receive:
-1. Summary statistics (total reviews, rating, sentiment distribution)
-2. Top issues by category with example quotes
-3. Top strengths with example quotes
-4. Domain breakdown (what customers talk about most)
-
-Generate a JSON response with these fields:
-
-{
-  "executive_narrative": "2-3 paragraph story explaining the business situation, key problems, and path forward. Be specific with numbers and examples.",
-
-  "sentiment_insight": "1-2 sentences explaining WHY sentiment is distributed this way. Connect to specific issues.",
-
-  "category_insight": "1-2 sentences about the pattern in categories. Which domain needs most attention and why?",
-
-  "timeline_insight": "1-2 sentences about trends if data shows changes over time.",
-
-  "priority_domain": "Single letter code (P/V/J/O/A/E/R) for the domain needing most attention, or null",
-
-  "priority_issue": "The subcode (e.g., 'V1.03') that should be fixed first, or null",
-
-  "action_plan": [
-    {
-      "id": "action_1",
-      "title": "Clear action title",
-      "why": "Root cause from the reviews",
-      "what": "Specific steps to take",
-      "who": "Department or role responsible",
-      "impact": "Expected outcome",
-      "evidence": ["Quote 1", "Quote 2"],
-      "estimated_rating_lift": 0.3,
-      "complexity": "quick|medium|complex",
-      "priority": "critical|high|medium|low",
-      "timeline": "This week|This month|This quarter",
-      "related_subcode": "V1.03"
-    }
-  ],
-
-  "timeline_annotations": [
-    {
-      "date": "2024-01-15",
-      "label": "Short label",
-      "description": "What happened",
-      "type": "positive|negative|neutral|event"
-    }
-  ],
-
-  "marketing_angles": [
-    "Way to promote strength 1",
-    "Way to promote strength 2"
-  ],
-
-  "competitor_context": "How this compares to industry/competitors, or null if unknown"
-}
-
-Be specific, actionable, and business-focused. Use actual numbers and quotes from the data.
-Prioritize actions by impact and feasibility.
-"""
-
-
-class SynthesisStage:
-    """
-    Stage 4: Generate AI synthesis from classified review data.
-
-    This stage:
-    1. Aggregates classification results
-    2. Identifies patterns and priorities
-    3. Generates narrative insights via LLM
-    4. Produces actionable recommendations
-    """
-
-    def __init__(self, pool: asyncpg.Pool, llm_client: LLMClientBase):
-        self.pool = pool
-        self.llm_client = llm_client
-
-    async def run(self, job_id: str, execution_id: str) -> Synthesis:
-        """
-        Generate synthesis for a completed pipeline execution.
-
-        Args:
-            job_id: The scraping job ID
-            execution_id: The pipeline execution ID
-
-        Returns:
-            Synthesis object with all generated insights
-        """
-        logger.info(f"Stage 4: Generating synthesis for job {job_id}")
-
-        # Gather all the data we need
-        context = await self._gather_context(job_id)
-
-        # Generate synthesis via LLM
-        synthesis = await self._generate_synthesis(context)
-
-        # Store synthesis in database
-        await self._store_synthesis(execution_id, synthesis)
-
-        logger.info(f"Stage 4: Synthesis complete - {len(synthesis.action_plan)} actions generated")
-        return synthesis
-
-    async def _gather_context(self, job_id: str) -> dict[str, Any]:
-        """Gather all context needed for synthesis."""
-
-        # Get overview stats
-        overview = await self.pool.fetchrow("""
-            SELECT
-                COUNT(DISTINCT r.review_id) as total_reviews,
-                AVG(r.rating) as avg_rating,
-                COUNT(s.span_id) as total_spans
-            FROM reviews r
-            LEFT JOIN pipeline.spans s ON s.source_review_id = r.review_id
-            WHERE r.job_id = $1
-        """, job_id)
-
-        # Get sentiment distribution
-        sentiment = await self.pool.fetch("""
-            SELECT
-                valence,
-                COUNT(*) as count,
-                COUNT(DISTINCT source_review_id) as review_count
-            FROM pipeline.spans
-            WHERE job_id = $1 AND valence IS NOT NULL
-            GROUP BY valence
-            ORDER BY count DESC
-        """, job_id)
-
-        # Get top issues (weaknesses)
-        top_issues = await self.pool.fetch("""
-            SELECT
-                s.urt_primary as subcode,
-                sc.name as subcode_name,
-                sc.definition,
-                d.code as domain,
-                d.name as domain_name,
-                COUNT(*) as span_count,
-                COUNT(*) FILTER (WHERE s.valence = 'V-') as negative_count,
-                ARRAY_AGG(s.span_text ORDER BY s.intensity DESC) FILTER (WHERE s.valence = 'V-') as example_quotes
-            FROM pipeline.spans s
-            JOIN pipeline.urt_subcodes sc ON sc.code = s.urt_primary
-            JOIN pipeline.urt_domains d ON d.code = SUBSTRING(s.urt_primary, 1, 1)
-            WHERE s.job_id = $1 AND s.valence = 'V-'
-            GROUP BY s.urt_primary, sc.name, sc.definition, d.code, d.name
-            ORDER BY negative_count DESC
-            LIMIT 10
-        """, job_id)
-
-        # Get top strengths
-        top_strengths = await self.pool.fetch("""
-            SELECT
-                s.urt_primary as subcode,
-                sc.name as subcode_name,
-                sc.definition,
-                d.code as domain,
-                d.name as domain_name,
-                COUNT(*) as span_count,
-                COUNT(*) FILTER (WHERE s.valence = 'V+') as positive_count,
-                ARRAY_AGG(s.span_text ORDER BY s.intensity DESC) FILTER (WHERE s.valence = 'V+') as example_quotes
-            FROM pipeline.spans s
-            JOIN pipeline.urt_subcodes sc ON sc.code = s.urt_primary
-            JOIN pipeline.urt_domains d ON d.code = SUBSTRING(s.urt_primary, 1, 1)
-            WHERE s.job_id = $1 AND s.valence = 'V+'
-            GROUP BY s.urt_primary, sc.name, sc.definition, d.code, d.name
-            ORDER BY positive_count DESC
-            LIMIT 5
-        """, job_id)
-
-        # Get domain distribution
-        domains = await self.pool.fetch("""
-            SELECT
-                SUBSTRING(urt_primary, 1, 1) as domain,
-                d.name as domain_name,
-                COUNT(*) as total_count,
-                COUNT(*) FILTER (WHERE valence = 'V+') as positive_count,
-                COUNT(*) FILTER (WHERE valence = 'V-') as negative_count
-            FROM pipeline.spans s
-            JOIN pipeline.urt_domains d ON d.code = SUBSTRING(s.urt_primary, 1, 1)
-            WHERE s.job_id = $1
-            GROUP BY SUBSTRING(urt_primary, 1, 1), d.name
-            ORDER BY total_count DESC
-        """, job_id)
-
-        # Get business name if available
-        business = await self.pool.fetchrow("""
-            SELECT DISTINCT business_name
-            FROM reviews
-            WHERE job_id = $1 AND business_name IS NOT NULL
-            LIMIT 1
-        """, job_id)
-
-        return {
-            "business_name": business["business_name"] if business else "This business",
-            "overview": dict(overview) if overview else {},
-            "sentiment": [dict(r) for r in sentiment],
-            "top_issues": [dict(r) for r in top_issues],
-            "top_strengths": [dict(r) for r in top_strengths],
-            "domains": [dict(r) for r in domains],
-        }
-
-    async def _generate_synthesis(self, context: dict[str, Any]) -> Synthesis:
-        """Generate synthesis using LLM."""
-
-        # Build the user prompt with context
-        user_prompt = f"""Analyze this review data for {context['business_name']}:
-
-## Overview
- Total Reviews: {context['overview'].get('total_reviews', 0)}
- Average Rating: {context['overview'].get('avg_rating', 'N/A')}
- Total Insights Extracted: {context['overview'].get('total_spans', 0)}
-
-## Sentiment Distribution
-{self._format_sentiment(context['sentiment'])}
-
-## Top Issues (Problems)
-{self._format_issues(context['top_issues'])}
-
-## Top Strengths
-{self._format_strengths(context['top_strengths'])}
-
-## Domain Breakdown
-{self._format_domains(context['domains'])}
-
-Generate a complete synthesis with actionable insights.
-"""
-
-        # Call LLM
-        try:
-            response = await self.llm_client.generate(
-                system_prompt=SYNTHESIS_SYSTEM_PROMPT,
-                user_prompt=user_prompt,
-                temperature=0.7,  # Allow some creativity
-                max_tokens=4000,
-            )
-
-            # Parse JSON response
-            result = json.loads(response)
-
-            # Convert to Synthesis object
-            return Synthesis(
-                executive_narrative=result.get("executive_narrative", ""),
-                sentiment_insight=result.get("sentiment_insight", ""),
-                category_insight=result.get("category_insight", ""),
-                timeline_insight=result.get("timeline_insight", ""),
-                priority_domain=result.get("priority_domain"),
-                priority_issue=result.get("priority_issue"),
-                action_plan=[
-                    ActionItem(
-                        id=a.get("id", f"action_{i}"),
-                        title=a.get("title", ""),
-                        why=a.get("why", ""),
-                        what=a.get("what", ""),
-                        who=a.get("who", ""),
-                        impact=a.get("impact", ""),
-                        evidence=a.get("evidence", []),
-                        estimated_rating_lift=a.get("estimated_rating_lift"),
-                        complexity=a.get("complexity", "medium"),
-                        priority=a.get("priority", "medium"),
-                        timeline=a.get("timeline", "This month"),
-                        related_subcode=a.get("related_subcode", ""),
-                    )
-                    for i, a in enumerate(result.get("action_plan", []))
-                ],
-                issue_actions={},  # Can be populated from action_plan
-                timeline_annotations=[
-                    TimelineAnnotation(
-                        date=t.get("date", ""),
-                        label=t.get("label", ""),
-                        description=t.get("description", ""),
-                        type=t.get("type", "neutral"),
-                    )
-                    for t in result.get("timeline_annotations", [])
-                ],
-                marketing_angles=result.get("marketing_angles", []),
-                competitor_context=result.get("competitor_context"),
-                generated_at=datetime.utcnow().isoformat(),
-            )
-
-        except json.JSONDecodeError as e:
-            logger.error(f"Failed to parse LLM response: {e}")
-            return self._create_fallback_synthesis()
-        except Exception as e:
-            logger.error(f"Synthesis generation failed: {e}")
-            return self._create_fallback_synthesis()
-
-    def _format_sentiment(self, sentiment: list[dict]) -> str:
-        """Format sentiment data for prompt."""
-        lines = []
-        for s in sentiment:
-            valence = s.get("valence", "Unknown")
-            count = s.get("count", 0)
-            reviews = s.get("review_count", 0)
-            label = {"V+": "Positive", "V-": "Negative", "V0": "Neutral", "V±": "Mixed"}.get(valence, valence)
-            lines.append(f"- {label}: {count} mentions ({reviews} reviews)")
-        return "\n".join(lines) or "No sentiment data"
-
-    def _format_issues(self, issues: list[dict]) -> str:
-        """Format issues for prompt."""
-        lines = []
-        for i, issue in enumerate(issues[:5], 1):
-            subcode = issue.get("subcode", "")
-            name = issue.get("subcode_name", "")
-            domain = issue.get("domain_name", "")
-            count = issue.get("negative_count", 0)
-            quotes = issue.get("example_quotes", [])[:2]
-
-            lines.append(f"{i}. [{subcode}] {name} ({domain})")
-            lines.append(f"   - {count} negative mentions")
-            for q in quotes:
-                if q:
-                    lines.append(f'   - Example: "{q[:100]}..."' if len(q) > 100 else f'   - Example: "{q}"')
-        return "\n".join(lines) or "No issues found"
-
-    def _format_strengths(self, strengths: list[dict]) -> str:
-        """Format strengths for prompt."""
-        lines = []
-        for i, strength in enumerate(strengths[:3], 1):
-            subcode = strength.get("subcode", "")
-            name = strength.get("subcode_name", "")
-            domain = strength.get("domain_name", "")
-            count = strength.get("positive_count", 0)
-            quotes = strength.get("example_quotes", [])[:2]
-
-            lines.append(f"{i}. [{subcode}] {name} ({domain})")
-            lines.append(f"   - {count} positive mentions")
-            for q in quotes:
-                if q:
-                    lines.append(f'   - Example: "{q[:100]}..."' if len(q) > 100 else f'   - Example: "{q}"')
-        return "\n".join(lines) or "No strengths found"
-
-    def _format_domains(self, domains: list[dict]) -> str:
-        """Format domain distribution for prompt."""
-        lines = []
-        for d in domains:
-            domain = d.get("domain", "")
-            name = d.get("domain_name", "")
-            total = d.get("total_count", 0)
-            positive = d.get("positive_count", 0)
-            negative = d.get("negative_count", 0)
-            lines.append(f"- {domain} ({name}): {total} total ({positive} positive, {negative} negative)")
-        return "\n".join(lines) or "No domain data"
-
-    def _create_fallback_synthesis(self) -> Synthesis:
-        """Create a minimal synthesis when LLM fails."""
-        return Synthesis(
-            executive_narrative="Unable to generate detailed analysis. Please review the data manually.",
-            sentiment_insight="",
-            category_insight="",
-            timeline_insight="",
-            priority_domain=None,
-            priority_issue=None,
-            action_plan=[],
-            issue_actions={},
-            timeline_annotations=[],
-            marketing_angles=[],
-            competitor_context=None,
-            generated_at=datetime.utcnow().isoformat(),
-        )
-
-    async def _store_synthesis(self, execution_id: str, synthesis: Synthesis) -> None:
-        """Store synthesis in database."""
-        await self.pool.execute("""
-            UPDATE pipeline.executions
-            SET
-                synthesis = $2,
-                updated_at = NOW()
-            WHERE execution_id = $1
-        """, execution_id, json.dumps({
-            "executive_narrative": synthesis.executive_narrative,
-            "sentiment_insight": synthesis.sentiment_insight,
-            "category_insight": synthesis.category_insight,
-            "timeline_insight": synthesis.timeline_insight,
-            "priority_domain": synthesis.priority_domain,
-            "priority_issue": synthesis.priority_issue,
-            "action_plan": [
-                {
-                    "id": a.id,
-                    "title": a.title,
-                    "why": a.why,
-                    "what": a.what,
-                    "who": a.who,
-                    "impact": a.impact,
-                    "evidence": a.evidence,
-                    "estimated_rating_lift": a.estimated_rating_lift,
-                    "complexity": a.complexity,
-                    "priority": a.priority,
-                    "timeline": a.timeline,
-                    "related_subcode": a.related_subcode,
-                }
-                for a in synthesis.action_plan
-            ],
-            "issue_actions": synthesis.issue_actions,
-            "timeline_annotations": [
-                {
-                    "date": t.date,
-                    "label": t.label,
-                    "description": t.description,
-                    "type": t.type,
-                }
-                for t in synthesis.timeline_annotations
-            ],
-            "marketing_angles": synthesis.marketing_angles,
-            "competitor_context": synthesis.competitor_context,
-            "generated_at": synthesis.generated_at,
-        }))
--- a/packages/reviewiq-pipeline/validate_router.py
+++ b/packages/reviewiq-pipeline/validate_router.py
@@ -0,0 +1,486 @@
+#!/usr/bin/env python
+"""
+Validate router decisions against real reviews with minimal LLM cost.
+
+This script:
+1. Loads real reviews from database
+2. Routes them through the router
+3. Cherry-picks samples from each tier for validation
+4. Optionally runs LLM on small samples to validate decisions
+
+Usage:
+    # Dry run - just show routing decisions, no LLM calls
+    python validate_router.py <job_id> --dry-run
+
+    # Validate with LLM (costs ~$0.05-0.10)
+    python validate_router.py <job_id> --validate
+
+    # Custom sample sizes
+    python validate_router.py <job_id> --validate --skip-samples=3 --cheap-samples=5 --full-samples=3
+"""
+
+import asyncio
+import argparse
+import json
+import logging
+import os
+import sys
+from dataclasses import dataclass
+from typing import Any
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger("validate_router")
+
+
+@dataclass
+class ValidationResult:
+    """Result of validating a single review."""
+    review_id: str
+    text: str
+    rating: int
+    routed_tier: str
+    routing_reason: str
+    routing_signals: dict
+    # LLM results (if validated)
+    llm_urt: str | None = None
+    llm_valence: str | None = None
+    llm_span_count: int | None = None
+    llm_cost: float | None = None
+    # Validation verdict
+    routing_correct: bool | None = None
+    notes: str = ""
+
+
+async def load_reviews_from_db(job_id: str, database_url: str) -> list[dict]:
+    """Load reviews from database for a job."""
+    import asyncpg
+
+    conn = await asyncpg.connect(database_url)
+    try:
+        # Get reviews with text from pipeline schema
+        rows = await conn.fetch("""
+            SELECT
+                re.review_id,
+                re.text,
+                re.rating,
+                re.business_id,
+                re.place_id
+            FROM pipeline.reviews_enriched re
+            WHERE re.job_id = $1::uuid
+            AND re.text IS NOT NULL
+            AND re.text != ''
+            ORDER BY re.id
+        """, job_id)
+
+        reviews = []
+        for row in rows:
+            text = row["text"] or ""
+            reviews.append({
+                "review_id": row["review_id"],
+                "text": text,
+                "text_normalized": text.lower().strip(),
+                "rating": row["rating"],
+                "business_id": row["business_id"],
+                "place_id": row["place_id"],
+                "source": "google",
+                "review_version": 1,
+                "review_time": "2024-01-01T00:00:00Z",
+            })
+
+        logger.info(f"Loaded {len(reviews)} reviews from job {job_id}")
+        return reviews
+
+    finally:
+        await conn.close()
+
+
+def route_reviews(reviews: list[dict]) -> dict[str, list[dict]]:
+    """Route reviews and return grouped by tier."""
+    from reviewiq_pipeline.services.review_router import (
+        ReviewRouter,
+        RoutingTier,
+        create_router,
+    )
+
+    router = create_router(conservative=True)
+    routed = router.route_batch(reviews)
+
+    return {
+        "skip": routed[RoutingTier.SKIP],
+        "cheap": routed[RoutingTier.CHEAP_MODEL],
+        "full": routed[RoutingTier.FULL_MODEL],
+    }
+
+
+def select_diverse_samples(
+    reviews: list[dict],
+    tier: str,
+    n_samples: int,
+) -> list[dict]:
+    """
+    Select diverse samples from a tier for validation.
+
+    Strategy:
+    - For SKIP: Pick different ratings, different lengths
+    - For CHEAP: Pick different word counts, different ratings
+    - For FULL: Pick different routing reasons
+    """
+    if not reviews or n_samples <= 0:
+        return []
+
+    samples = []
+    seen_reasons = set()
+    seen_ratings = set()
+
+    # First pass: get diversity by reason and rating
+    for review in reviews:
+        routing = review.get("_routing")
+        if not routing:
+            continue
+
+        reason = routing.reason
+        rating = review["rating"]
+
+        # Prioritize diversity
+        key = (reason, rating)
+        if key not in seen_reasons or len(samples) < n_samples:
+            if len(samples) < n_samples:
+                samples.append(review)
+                seen_reasons.add(key)
+                seen_ratings.add(rating)
+
+    # Fill remaining slots if needed
+    for review in reviews:
+        if len(samples) >= n_samples:
+            break
+        if review not in samples:
+            samples.append(review)
+
+    return samples[:n_samples]
+
+
+def print_routing_summary(routed: dict[str, list[dict]]):
+    """Print summary of routing decisions."""
+    total = sum(len(v) for v in routed.values())
+
+    print("\n" + "=" * 70)
+    print("ROUTING SUMMARY")
+    print("=" * 70)
+
+    for tier, reviews in routed.items():
+        pct = len(reviews) / total * 100 if total > 0 else 0
+        print(f"\n{tier.upper()} TIER: {len(reviews)} reviews ({pct:.1f}%)")
+
+        # Group by reason
+        reasons = {}
+        for r in reviews:
+            routing = r.get("_routing")
+            if routing:
+                reason = routing.reason
+                reasons[reason] = reasons.get(reason, 0) + 1
+
+        for reason, count in sorted(reasons.items(), key=lambda x: -x[1]):
+            print(f"  - {reason}: {count}")
+
+
+def print_samples(samples: list[dict], tier: str):
+    """Print sample reviews for inspection."""
+    print(f"\n{'=' * 70}")
+    print(f"{tier.upper()} TIER SAMPLES ({len(samples)} reviews)")
+    print("=" * 70)
+
+    for i, review in enumerate(samples, 1):
+        routing = review.get("_routing")
+        signals = routing.signals if routing else {}
+
+        print(f"\n[{i}] Review ID: {review['review_id']}")
+        print(f"    Rating: {'⭐' * review['rating']}")
+        print(f"    Text: \"{review['text'][:100]}{'...' if len(review['text']) > 100 else ''}\"")
+        print(f"    Routing: {routing.reason if routing else 'N/A'}")
+        print(f"    Signals: words={signals.get('word_count', '?')}, "
+              f"chars={signals.get('char_count', '?')}, "
+              f"numbers={signals.get('has_numbers', '?')}, "
+              f"sentences={signals.get('sentence_count', '?')}")
+
+
+async def validate_with_llm(
+    samples: list[dict],
+    tier: str,
+    config: Any,
+) -> list[ValidationResult]:
+    """
+    Run LLM classification on samples to validate routing decisions.
+
+    Returns validation results with verdicts.
+    """
+    from reviewiq_pipeline.services.llm_client import LLMClient, BatchReviewInput, PartialBatchResult
+
+    results = []
+
+    if not samples:
+        return results
+
+    # Create LLM client
+    client = LLMClient.create(config)
+
+    try:
+        # Prepare batch input
+        batch_input = [
+            BatchReviewInput(
+                review_id=r["review_id"],
+                text=r["text"],
+                rating=r["rating"],
+            )
+            for r in samples
+        ]
+
+        # Run classification
+        logger.info(f"Running LLM on {len(samples)} {tier} tier samples...")
+
+        llm_responses = []
+        metadata = {}
+
+        try:
+            llm_responses, metadata = await client.classify_batch(batch_input, "standard")
+        except PartialBatchResult as e:
+            # Handle partial results
+            logger.warning(f"Partial result for {tier} tier: {len(e.partial_results)} recovered")
+            metadata = e.metadata or {}
+
+            # Build responses from partial results
+            for partial in e.partial_results:
+                idx = partial.get("review_index", -1)
+                if 0 <= idx < len(samples):
+                    llm_responses.append({
+                        "spans": partial.get("spans", []),
+                        "review_summary": partial.get("review_summary", {}),
+                        "_index": idx,
+                    })
+
+            # Pad with empty responses for missing indices
+            processed_indices = {r.get("_index", -1) for r in llm_responses}
+            for i, sample in enumerate(samples):
+                if i not in processed_indices:
+                    llm_responses.append({
+                        "spans": [],
+                        "review_summary": {},
+                        "_index": i,
+                        "_error": "partial_recovery_failed",
+                    })
+
+            # Sort by original index
+            llm_responses.sort(key=lambda x: x.get("_index", 999))
+
+        cost = metadata.get("cost_usd", 0)
+        logger.info(f"LLM cost for {tier} tier: ${cost:.4f}")
+
+        # Process results
+        for review, llm_response in zip(samples, llm_responses):
+            routing = review.get("_routing")
+            signals = routing.signals if routing else {}
+
+            spans = llm_response.get("spans", [])
+            primary_span = next((s for s in spans if s.get("is_primary")), spans[0] if spans else {})
+
+            urt = primary_span.get("urt_primary", "N/A")
+            valence = primary_span.get("valence", "N/A")
+
+            # Determine if routing was correct
+            routing_correct = None
+            notes = ""
+
+            if tier == "skip":
+                # SKIP is correct if LLM gives generic code (V4.03) or single low-info span
+                is_generic = urt in ("V4.03", "V4.01", "V4.02", "O1.01")
+                is_simple = len(spans) == 1 and primary_span.get("intensity") == "I1"
+                routing_correct = is_generic or is_simple
+                if not routing_correct:
+                    notes = f"LLM found specific content: {urt}"
+                else:
+                    notes = "Correctly skipped (generic/simple)"
+
+            elif tier == "cheap":
+                # CHEAP is correct if classification is straightforward
+                # (single domain, no complex causal chains)
+                is_simple = len(spans) <= 2
+                routing_correct = is_simple
+                if not routing_correct:
+                    notes = f"Complex: {len(spans)} spans found"
+                else:
+                    notes = "Simple enough for cheap model"
+
+            elif tier == "full":
+                # FULL is correct if there's meaningful content
+                has_content = len(spans) >= 1 and urt not in ("V4.03", "O1.01")
+                routing_correct = has_content
+                if routing_correct:
+                    notes = f"Correctly sent to full: {len(spans)} spans, {urt}"
+                else:
+                    notes = "Could have been cheaper"
+
+            result = ValidationResult(
+                review_id=review["review_id"],
+                text=review["text"],
+                rating=review["rating"],
+                routed_tier=tier,
+                routing_reason=routing.reason if routing else "N/A",
+                routing_signals=signals,
+                llm_urt=urt,
+                llm_valence=valence,
+                llm_span_count=len(spans),
+                llm_cost=cost / len(samples),
+                routing_correct=routing_correct,
+                notes=notes,
+            )
+            results.append(result)
+
+    finally:
+        await client.close()
+
+    return results
+
+
+def print_validation_results(results: list[ValidationResult], tier: str):
+    """Print validation results."""
+    if not results:
+        return
+
+    print(f"\n{'=' * 70}")
+    print(f"{tier.upper()} TIER VALIDATION RESULTS")
+    print("=" * 70)
+
+    correct = sum(1 for r in results if r.routing_correct)
+    total = len(results)
+    accuracy = correct / total * 100 if total > 0 else 0
+
+    print(f"\nAccuracy: {correct}/{total} ({accuracy:.1f}%)")
+
+    for r in results:
+        status = "✅" if r.routing_correct else "❌"
+        print(f"\n{status} [{r.review_id}] \"{r.text[:60]}...\"")
+        print(f"   Rating: {r.rating}, Routed: {r.routed_tier} ({r.routing_reason})")
+        print(f"   LLM: URT={r.llm_urt}, Valence={r.llm_valence}, Spans={r.llm_span_count}")
+        print(f"   Notes: {r.notes}")
+
+
+async def main():
+    parser = argparse.ArgumentParser(description="Validate router decisions")
+    parser.add_argument("job_id", help="Job ID to analyze")
+    parser.add_argument("--dry-run", action="store_true", help="Show routing only, no LLM")
+    parser.add_argument("--validate", action="store_true", help="Run LLM validation")
+    parser.add_argument("--skip-samples", type=int, default=3, help="SKIP tier samples")
+    parser.add_argument("--cheap-samples", type=int, default=5, help="CHEAP tier samples")
+    parser.add_argument("--full-samples", type=int, default=3, help="FULL tier samples")
+
+    args = parser.parse_args()
+
+    # Database URL
+    database_url = os.environ.get(
+        "DATABASE_URL",
+        "postgresql://scraper:scraper123@localhost:5437/scraper"
+    )
+
+    # Load reviews
+    reviews = await load_reviews_from_db(args.job_id, database_url)
+    if not reviews:
+        print("No reviews found for job")
+        return
+
+    # Route reviews
+    routed = route_reviews(reviews)
+
+    # Print summary
+    print_routing_summary(routed)
+
+    # Select samples
+    skip_samples = select_diverse_samples(routed["skip"], "skip", args.skip_samples)
+    cheap_samples = select_diverse_samples(routed["cheap"], "cheap", args.cheap_samples)
+    full_samples = select_diverse_samples(routed["full"], "full", args.full_samples)
+
+    # Print samples
+    print_samples(skip_samples, "skip")
+    print_samples(cheap_samples, "cheap")
+    print_samples(full_samples, "full")
+
+    # Estimate cost
+    total_samples = len(skip_samples) + len(cheap_samples) + len(full_samples)
+    estimated_cost = total_samples * 0.003  # ~$0.003 per review with Sonnet
+    print(f"\n{'=' * 70}")
+    print(f"VALIDATION COST ESTIMATE: ~${estimated_cost:.3f} for {total_samples} samples")
+    print("=" * 70)
+
+    if args.dry_run:
+        print("\n[DRY RUN] No LLM calls made. Use --validate to run validation.")
+        return
+
+    if not args.validate:
+        print("\nUse --validate to run LLM validation on these samples.")
+        return
+
+    # Run validation
+    from reviewiq_pipeline.config import Config
+
+    config = Config(
+        database_url=database_url,
+        llm_provider="anthropic",
+        llm_model="claude-sonnet-4-5-20250929",
+        anthropic_api_key=os.environ.get("ANTHROPIC_API_KEY",
+            "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"),
+    )
+
+    all_results = []
+    total_cost = 0
+
+    # Validate each tier
+    for tier, samples in [("skip", skip_samples), ("cheap", cheap_samples), ("full", full_samples)]:
+        if samples:
+            results = await validate_with_llm(samples, tier, config)
+            all_results.extend(results)
+            total_cost += sum(r.llm_cost or 0 for r in results)
+            print_validation_results(results, tier)
+
+    # Print summary
+    print(f"\n{'=' * 70}")
+    print("VALIDATION SUMMARY")
+    print("=" * 70)
+
+    for tier in ["skip", "cheap", "full"]:
+        tier_results = [r for r in all_results if r.routed_tier == tier]
+        if tier_results:
+            correct = sum(1 for r in tier_results if r.routing_correct)
+            total = len(tier_results)
+            print(f"{tier.upper()}: {correct}/{total} correct ({correct/total*100:.0f}%)")
+
+    overall_correct = sum(1 for r in all_results if r.routing_correct)
+    overall_total = len(all_results)
+    print(f"\nOVERALL: {overall_correct}/{overall_total} correct ({overall_correct/overall_total*100:.0f}%)")
+    print(f"TOTAL COST: ${total_cost:.4f}")
+
+    # Recommendations
+    print(f"\n{'=' * 70}")
+    print("RECOMMENDATIONS")
+    print("=" * 70)
+
+    skip_errors = [r for r in all_results if r.routed_tier == "skip" and not r.routing_correct]
+    if skip_errors:
+        print("\n⚠️  SKIP tier false negatives found:")
+        for r in skip_errors:
+            print(f"   - \"{r.text[:50]}...\" → {r.llm_urt}")
+        print("   Consider tightening SKIP criteria")
+    else:
+        print("\n✅ SKIP tier looks safe")
+
+    cheap_errors = [r for r in all_results if r.routed_tier == "cheap" and not r.routing_correct]
+    if cheap_errors:
+        print("\n⚠️  CHEAP tier may miss complexity:")
+        for r in cheap_errors:
+            print(f"   - \"{r.text[:50]}...\" → {r.llm_span_count} spans")
+    else:
+        print("\n✅ CHEAP tier thresholds look good")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/run_synthesis.py
+++ b/run_synthesis.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+"""Regenerate synthesis with new report format."""
+import asyncio
+import os
+import sys
+import uuid
+
+sys.path.insert(0, '/app/packages/reviewiq-pipeline/src')
+sys.path.insert(0, '/app/packages/pipeline-core/src')
+
+async def main():
+    import asyncpg
+    from reviewiq_pipeline.config import Config
+    from reviewiq_pipeline.services.llm_client import LLMClient
+    from reviewiq_pipeline.stages.stage5_synthesize import Stage5Synthesizer
+
+    job_id = "a3813665-ea23-4fb0-aab7-b282ef9443e4"
+
+    database_url = os.getenv(
+        'DATABASE_URL',
+        'postgresql://scraper:scraper123@scraper-db:5432/scraper'
+    )
+
+    print("Connecting to database...")
+    pool = await asyncpg.create_pool(database_url)
+
+    # Check if execution exists for this job, create one if not
+    print("Checking for existing execution...")
+    row = await pool.fetchrow(
+        "SELECT id FROM pipeline.executions WHERE job_id = $1::uuid ORDER BY created_at DESC LIMIT 1",
+        job_id
+    )
+
+    if row:
+        execution_id = str(row['id'])
+        print(f"Found existing execution: {execution_id}")
+    else:
+        execution_id = str(uuid.uuid4())
+        print(f"Creating new execution: {execution_id}")
+        await pool.execute("""
+            INSERT INTO pipeline.executions (id, pipeline_id, job_id, status, stages_requested, created_at)
+            VALUES ($1::uuid, 'reviewiq', $2::uuid, 'running', ARRAY['synthesize'], NOW())
+        """, execution_id, job_id)
+
+    print("Creating LLM client...")
+    config = Config()
+    llm_client = LLMClient.create(config)
+
+    try:
+        print(f"Generating analyst report for job {job_id}...")
+        stage5 = Stage5Synthesizer(pool=pool, llm_client=llm_client)
+        synthesis = await stage5.run(job_id, execution_id)
+
+        # Mark execution as completed
+        await pool.execute(
+            "UPDATE pipeline.executions SET status = 'completed', completed_at = NOW() WHERE id = $1::uuid",
+            execution_id
+        )
+
+        print(f"\n{'='*70}")
+        print("ANALYST REPORT GENERATED")
+        print(f"{'='*70}")
+        print(f"\nHEADLINE: {synthesis.headline}")
+        print(f"\nVERDICT: {synthesis.verdict}")
+        print(f"\nRATING: {synthesis.current_rating:.1f} → {synthesis.potential_rating:.1f} (gap: +{synthesis.rating_gap:.1f})")
+        print(f"\nNARRATIVE:\n{synthesis.narrative[:500]}...")
+        print(f"\nPRIMARY PROBLEM: {synthesis.primary_problem}")
+        print(f"ROOT CAUSE: {synthesis.root_cause}")
+        print(f"\nACTIONS ({len(synthesis.actions)}):")
+        for a in synthesis.actions:
+            print(f"  [{a.priority}] {a.action}")
+            print(f"      Owner: {a.owner} | Impact: {a.impact}")
+        print(f"\nEVIDENCE ({len(synthesis.evidence)}):")
+        for e in synthesis.evidence[:3]:
+            print(f"  [{e.sentiment}] \"{e.quote[:60]}...\"")
+            print(f"      Context: {e.context}")
+
+    finally:
+        await llm_client.close()
+        await pool.close()
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scrapers/google_reviews/session_manager.py
+++ b/scrapers/google_reviews/session_manager.py
@@ -0,0 +1,266 @@
+"""
+Session Manager for Google Reviews Scraper
+
+Manages browser sessions between validation and scraping phases.
+Allows reusing the same browser instance to avoid duplicate navigation.
+
+Usage:
+    # During validation
+    session_id = session_manager.create_session(driver, business_info, total_reviews)
+    return {"session_id": session_id, "business_info": business_info}
+
+    # During scraping (with session_id from validation)
+    session = session_manager.get_session(session_id)
+    if session:
+        driver = session['driver']
+        # Continue from where validation left off
+"""
+
+import uuid
+import time
+import threading
+from typing import Optional, Dict, Any
+from dataclasses import dataclass, field
+from datetime import datetime
+
+
+@dataclass
+class BrowserSession:
+    """Represents a validated browser session ready for scraping."""
+    session_id: str
+    driver: Any  # WebDriver instance
+    url: str
+    business_info: Dict[str, Any]
+    total_reviews: int
+    created_at: float
+    expires_at: float
+    browser_fingerprint: Optional[Dict[str, Any]] = None
+    log_capture: Any = None  # LogCapture instance
+    # Track session state
+    state: str = "validated"  # validated -> scraping -> completed/expired
+
+
+class SessionManager:
+    """
+    Manages browser sessions between validation and scraping.
+
+    Sessions have a TTL (default 5 minutes) after which they're automatically
+    cleaned up and the browser is closed.
+    """
+
+    DEFAULT_TTL_SECONDS = 300  # 5 minutes
+    CLEANUP_INTERVAL_SECONDS = 30  # Check for expired sessions every 30s
+
+    def __init__(self, ttl_seconds: int = None):
+        self.ttl_seconds = ttl_seconds or self.DEFAULT_TTL_SECONDS
+        self._sessions: Dict[str, BrowserSession] = {}
+        self._lock = threading.RLock()
+        self._cleanup_thread: Optional[threading.Thread] = None
+        self._running = False
+
+    def start(self):
+        """Start the background cleanup thread."""
+        if self._running:
+            return
+        self._running = True
+        self._cleanup_thread = threading.Thread(target=self._cleanup_loop, daemon=True)
+        self._cleanup_thread.start()
+
+    def stop(self):
+        """Stop the background cleanup thread."""
+        self._running = False
+        if self._cleanup_thread:
+            self._cleanup_thread.join(timeout=5)
+
+    def _cleanup_loop(self):
+        """Background loop to clean up expired sessions."""
+        while self._running:
+            try:
+                self._cleanup_expired()
+            except Exception as e:
+                print(f"[SessionManager] Cleanup error: {e}")
+            time.sleep(self.CLEANUP_INTERVAL_SECONDS)
+
+    def _cleanup_expired(self):
+        """Remove expired sessions and close their browsers."""
+        now = time.time()
+        expired_ids = []
+
+        with self._lock:
+            for session_id, session in self._sessions.items():
+                if now > session.expires_at:
+                    expired_ids.append(session_id)
+
+        for session_id in expired_ids:
+            self.release_session(session_id, reason="expired")
+
+    def create_session(
+        self,
+        driver: Any,
+        url: str,
+        business_info: Dict[str, Any],
+        total_reviews: int,
+        browser_fingerprint: Optional[Dict[str, Any]] = None,
+        log_capture: Any = None,
+        ttl_seconds: Optional[int] = None
+    ) -> str:
+        """
+        Create a new browser session after validation.
+
+        Args:
+            driver: WebDriver instance (positioned on Google Maps page)
+            url: The validated Google Maps URL
+            business_info: Extracted business information
+            total_reviews: Total review count from page
+            browser_fingerprint: Browser fingerprint settings used
+            log_capture: LogCapture instance for logging
+            ttl_seconds: Custom TTL for this session (default: 5 min)
+
+        Returns:
+            session_id: Unique identifier for this session
+        """
+        session_id = str(uuid.uuid4())[:8]  # Short ID for easier use
+        ttl = ttl_seconds or self.ttl_seconds
+        now = time.time()
+
+        session = BrowserSession(
+            session_id=session_id,
+            driver=driver,
+            url=url,
+            business_info=business_info,
+            total_reviews=total_reviews,
+            created_at=now,
+            expires_at=now + ttl,
+            browser_fingerprint=browser_fingerprint,
+            log_capture=log_capture,
+            state="validated"
+        )
+
+        with self._lock:
+            self._sessions[session_id] = session
+
+        print(f"[SessionManager] Created session {session_id} for {business_info.get('name', 'unknown')} (TTL: {ttl}s)")
+        return session_id
+
+    def get_session(self, session_id: str) -> Optional[BrowserSession]:
+        """
+        Retrieve a session by ID.
+
+        Args:
+            session_id: The session identifier
+
+        Returns:
+            BrowserSession if found and not expired, None otherwise
+        """
+        with self._lock:
+            session = self._sessions.get(session_id)
+            if not session:
+                print(f"[SessionManager] Session {session_id} not found")
+                return None
+
+            # Check if expired
+            if time.time() > session.expires_at:
+                print(f"[SessionManager] Session {session_id} expired")
+                self.release_session(session_id, reason="expired")
+                return None
+
+            return session
+
+    def claim_session(self, session_id: str) -> Optional[BrowserSession]:
+        """
+        Claim a session for scraping (marks it as in-use).
+
+        Args:
+            session_id: The session identifier
+
+        Returns:
+            BrowserSession if successfully claimed, None otherwise
+        """
+        with self._lock:
+            session = self.get_session(session_id)
+            if not session:
+                return None
+
+            if session.state != "validated":
+                print(f"[SessionManager] Session {session_id} already in state: {session.state}")
+                return None
+
+            session.state = "scraping"
+            # Extend TTL during scraping (1 hour max)
+            session.expires_at = time.time() + 3600
+
+            print(f"[SessionManager] Claimed session {session_id} for scraping")
+            return session
+
+    def release_session(self, session_id: str, reason: str = "completed"):
+        """
+        Release a session and close the browser.
+
+        Args:
+            session_id: The session identifier
+            reason: Why the session is being released
+        """
+        with self._lock:
+            session = self._sessions.pop(session_id, None)
+
+        if session:
+            print(f"[SessionManager] Releasing session {session_id} ({reason})")
+            try:
+                if session.driver:
+                    session.driver.quit()
+            except Exception as e:
+                print(f"[SessionManager] Error closing driver for {session_id}: {e}")
+
+    def extend_session(self, session_id: str, additional_seconds: int = 300) -> bool:
+        """
+        Extend a session's TTL.
+
+        Args:
+            session_id: The session identifier
+            additional_seconds: Seconds to add to TTL
+
+        Returns:
+            True if extended, False if session not found
+        """
+        with self._lock:
+            session = self._sessions.get(session_id)
+            if not session:
+                return False
+            session.expires_at = time.time() + additional_seconds
+            return True
+
+    def get_stats(self) -> Dict[str, Any]:
+        """Get session manager statistics."""
+        with self._lock:
+            now = time.time()
+            sessions = []
+            for sid, s in self._sessions.items():
+                sessions.append({
+                    "session_id": sid,
+                    "business": s.business_info.get("name", "unknown"),
+                    "state": s.state,
+                    "age_seconds": int(now - s.created_at),
+                    "ttl_remaining": int(s.expires_at - now)
+                })
+            return {
+                "total_sessions": len(self._sessions),
+                "sessions": sessions
+            }
+
+    def list_sessions(self) -> list:
+        """List all active sessions."""
+        with self._lock:
+            return list(self._sessions.keys())
+
+
+# Global singleton instance
+_session_manager: Optional[SessionManager] = None
+
+
+def get_session_manager() -> SessionManager:
+    """Get or create the global session manager instance."""
+    global _session_manager
+    if _session_manager is None:
+        _session_manager = SessionManager()
+        _session_manager.start()
+    return _session_manager
--- a/scrapers/google_reviews/v1_1_0.py
+++ b/scrapers/google_reviews/v1_1_0.py
@@ -732,7 +732,7 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
                   progress_callback=None, validation_only: bool = False,
                   sort_strategy: str = SORT_AUTO, sort_order: List[str] = None,
                   multi_sort_threshold: int = MULTI_SORT_THRESHOLD,
-                   close_enough_pct: float = 95.0) -> dict:
+                   close_enough_pct: float = 95.0, initial_sort: str = None) -> dict:
    """
    Scrape Google Maps reviews with optional multi-sort strategy.

@@ -754,6 +754,7 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
        sort_order: Custom sort order for multi-sort (default: newest, lowest, highest, relevant)
        multi_sort_threshold: Auto-enable multi-sort if total reviews > this (default: 1000)
        close_enough_pct: Stop retrying if we have this % of total reviews (default: 95.0)
+        initial_sort: Initial sort order to use (default: newest). Used for retry with different sort

    Returns:
        dict with reviews list and metadata
@@ -1381,8 +1382,10 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
                log.info('browser', "Sort button found")
                break

+        # Track bot detection - if sort button hidden, Google likely detected bot
+        bot_detected = not sort_found
        if not sort_found:
-            log.warn('browser', "Sort button not found after waiting, continuing without sorting")
+            log.warn('browser', "Sort button not found after waiting, continuing without sorting (bot detection likely)")

        # Sort by specified order (default: newest)
        target_sort = initial_sort or SORT_NEWEST
@@ -1815,6 +1818,71 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
                    }
                    text = longestText;

+                    // OWNER RESPONSE: Find by "Response from the owner" text anchor
+                    var ownerResponse = null;
+                    var ownerSpan = null;
+                    var cardSpans = card.querySelectorAll('span');
+                    for (var k = 0; k < cardSpans.length; k++) {
+                        if (cardSpans[k].textContent.trim() === 'Response from the owner') {
+                            ownerSpan = cardSpans[k];
+                            break;
+                        }
+                    }
+
+                    if (ownerSpan) {
+                        // Navigate: span -> header div -> container div
+                        var headerDiv = ownerSpan.closest('div');
+                        var respContainer = headerDiv ? headerDiv.parentElement : null;
+
+                        if (respContainer) {
+                            // Click expand button if exists and not expanded
+                            var expandBtn = respContainer.querySelector('button[aria-label="See more"]');
+                            if (expandBtn && expandBtn.getAttribute('aria-expanded') !== 'true') {
+                                expandBtn.click();
+                            }
+
+                            // Get timestamp from header spans
+                            var respTimestamp = '';
+                            var headerSpans = headerDiv.querySelectorAll('span');
+                            for (var m = 0; m < headerSpans.length; m++) {
+                                var spanTxt = headerSpans[m].textContent.trim();
+                                if (spanTxt.match(/ago$/i)) {
+                                    respTimestamp = spanTxt;
+                                    break;
+                                }
+                            }
+
+                            // Get response text from direct child div[lang]
+                            var respText = '';
+                            var langDivs = respContainer.children;
+                            for (var m = 0; m < langDivs.length; m++) {
+                                if (langDivs[m].tagName === 'DIV' && langDivs[m].hasAttribute('lang')) {
+                                    respText = langDivs[m].textContent.trim();
+                                    respText = respText.replace(/(More|Less)$/, '').trim();
+                                    break;
+                                }
+                            }
+
+                            // Fallback: find longest text div that's not the header
+                            if (!respText) {
+                                for (var m = 0; m < langDivs.length; m++) {
+                                    if (langDivs[m].tagName === 'DIV') {
+                                        var divTxt = langDivs[m].textContent.trim();
+                                        if (divTxt.includes('Response from the owner')) continue;
+                                        divTxt = divTxt.replace(/(More|Less)$/, '').trim();
+                                        if (divTxt.length > respText.length) {
+                                            respText = divTxt;
+                                        }
+                                    }
+                                }
+                            }
+
+                            if (respText) {
+                                ownerResponse = {text: respText, timestamp: respTimestamp};
+                            }
+                        }
+                    }
+
                    if (author && rating >= 1 && rating <= 5) {
                        results.push({
                            id: rid,
@@ -1823,6 +1891,7 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
                            text: text,
                            rating: rating,
                            timestamp: timestamp,
+                            owner_response: ownerResponse,
                            source: 'dom'
                        });
                    }
@@ -2198,6 +2267,9 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
                topics_inferred_count += 1
        log.info('scraper', f"Topics inferred for {topics_inferred_count}/{len(review_list)} reviews", metrics={'topics_inferred_count': topics_inferred_count, 'reviews_count': len(review_list)})

+    # Include business info captured from Overview page
+    business_info = business_info_cache[0] or {}
+
    return {
        "reviews": review_list,  # Only unflushed reviews (flushed already sent to callback)
        "total": grand_total,
@@ -2209,10 +2281,19 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
        "metrics_history": metrics_history,  # For crash detection
        "start_time": start_time,  # For crash report elapsed time
        "session_fingerprint": session_fingerprint,  # Browser fingerprint for bot detection analysis
+        "bot_detected": bot_detected if 'bot_detected' in dir() else False,  # True if sort button was hidden
+        "initial_sort_used": target_sort if 'target_sort' in dir() else SORT_NEWEST,  # Sort order used for first pass
        "multi_sort": {
            "enabled": should_multi_sort if 'should_multi_sort' in dir() else False,
            "completed_sorts": completed_sorts if 'completed_sorts' in dir() else [SORT_NEWEST],
            "first_pass_count": first_pass_count if 'first_pass_count' in dir() else grand_total
+        },
+        # Business info captured from Google Maps page
+        "business_info": {
+            "name": business_info.get("name"),
+            "category": business_info.get("category"),
+            "address": business_info.get("address"),
+            "rating": business_info.get("rating")
        }
    }

@@ -2220,7 +2301,8 @@ def scrape_reviews(driver, url: str, max_reviews: int = 5000, timeout_no_new: in
 def fast_scrape_reviews(url: str, headless: bool = False, max_scrolls: int = 999999,
                        progress_callback=None, driver=None, return_driver: bool = False,
                        log_capture: LogCapture = None, flush_callback=None, validation_only: bool = False,
-                        browser_fingerprint: dict = None):
+                        browser_fingerprint: dict = None, initial_sort: str = None,
+                        sort_strategy: str = SORT_AUTO, max_reviews: int = None):
    """
    Production-compatible wrapper for scrape_reviews.
    Matches the API expected by job_manager.py.
@@ -2240,6 +2322,10 @@ def fast_scrape_reviews(url: str, headless: bool = False, max_scrolls: int = 999
            - timezone: string (e.g., "Europe/Madrid")
            - language: string (e.g., "en-US")
            - platform: string (e.g., "MacIntel")
+        initial_sort: Initial sort order to use ("newest", "lowest", "highest", "relevant")
+                     Used for retry with different sort strategy
+        sort_strategy: Sort strategy ("auto", "multi", "single", or specific sort)
+        max_reviews: Maximum reviews to collect (for testing). None = unlimited (default: 5000)

    Returns:
        Dictionary with: reviews, count, total_reviews, time, success, error, driver, logs
@@ -2329,13 +2415,15 @@ def fast_scrape_reviews(url: str, headless: bool = False, max_scrolls: int = 999
        result = scrape_reviews(
            driver=driver,
            url=url,
-            max_reviews=999999,  # Effectively unlimited
+            max_reviews=max_reviews if max_reviews else 999999,  # Unlimited by default, or custom limit for testing
            timeout_no_new=15,
            flush_callback=internal_flush,
            flush_batch_size=100,  # Smaller batches for more frequent progress
            log_capture=log_capture,
            progress_callback=progress_callback,  # Pass through for real-time log updates
-            validation_only=validation_only  # Return early if just validating
+            validation_only=validation_only,  # Return early if just validating
+            sort_strategy=sort_strategy,  # Sort strategy (auto, multi, single)
+            initial_sort=initial_sort  # Initial sort order for retry with different sort
        )

        elapsed = time.time() - start_time
@@ -2350,7 +2438,13 @@ def fast_scrape_reviews(url: str, headless: bool = False, max_scrolls: int = 999
            "error": None,
            "logs": result.get("logs", []),
            "review_topics": result.get("review_topics", []),  # Topic filters with mention counts
-            "session_fingerprint": result.get("session_fingerprint")  # Browser fingerprint for bot detection
+            "session_fingerprint": result.get("session_fingerprint"),  # Browser fingerprint for bot detection
+            # Tracking info for retry strategy
+            "bot_detected": result.get("bot_detected", False),  # True if sort button was hidden by Google
+            "initial_sort_used": result.get("initial_sort_used", "newest"),  # Sort order used
+            "multi_sort": result.get("multi_sort", {}),  # Multi-sort completion info
+            # Business info captured from Google Maps page
+            "business_info": result.get("business_info", {})
        }

        # Include validation_info if in validation_only mode
--- a/scrapers/google_reviews/v1_2_0.py
+++ b/scrapers/google_reviews/v1_2_0.py
--- a/scripts/backfill_business_category.py
+++ b/scripts/backfill_business_category.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+"""
+Backfill missing business_category for existing jobs.
+Uses validation_only mode to quickly capture business info without re-scraping reviews.
+"""
+import asyncio
+import asyncpg
+import os
+import sys
+
+# Add project root to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from scrapers.google_reviews.v1_1_0 import fast_scrape_reviews
+
+DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://scraper:scraper123@localhost:5437/scraper')
+
+
+async def backfill_categories():
+    """Fetch and update missing business categories."""
+
+    # Connect to database
+    conn = await asyncpg.connect(DATABASE_URL)
+
+    try:
+        # Get jobs missing business_category
+        rows = await conn.fetch("""
+            SELECT job_id, url, business_name
+            FROM jobs
+            WHERE business_category IS NULL
+              AND status = 'completed'
+            ORDER BY created_at DESC
+        """)
+
+        print(f"Found {len(rows)} jobs missing business_category\n")
+
+        updated = 0
+        failed = 0
+
+        for row in rows:
+            job_id = row['job_id']
+            url = row['url']
+            name = row['business_name'] or 'Unknown'
+
+            print(f"Processing: {name[:50]}...")
+
+            try:
+                # Run validation-only scrape (fast - just captures business info)
+                result = await asyncio.to_thread(
+                    fast_scrape_reviews,
+                    url=url,
+                    headless=True,
+                    validation_only=True
+                )
+
+                # Extract category from validation_info
+                validation_info = result.get('validation_info', {})
+                category = validation_info.get('category')
+
+                if category:
+                    # Update the database
+                    await conn.execute("""
+                        UPDATE jobs
+                        SET business_category = $2,
+                            updated_at = NOW()
+                        WHERE job_id = $1
+                    """, job_id, category)
+
+                    print(f"  ✓ Category: {category}")
+                    updated += 1
+                else:
+                    print(f"  ✗ No category found")
+                    failed += 1
+
+            except Exception as e:
+                print(f"  ✗ Error: {e}")
+                failed += 1
+
+        print(f"\n{'='*50}")
+        print(f"Done! Updated: {updated}, Failed: {failed}")
+
+    finally:
+        await conn.close()
+
+
+if __name__ == '__main__':
+    asyncio.run(backfill_categories())
--- a/scripts/register_reputation_pipeline.py
+++ b/scripts/register_reputation_pipeline.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+"""
+Register the Reputation Pipeline in the pipeline registry.
+
+Usage:
+    python scripts/register_reputation_pipeline.py
+"""
+
+import asyncio
+import os
+import sys
+
+import asyncpg
+
+# Database URL
+DB_URL = os.environ.get(
+    "DATABASE_URL",
+    "postgresql://scraper:scraper123@localhost:5437/scraper"
+)
+
+
+async def register_pipeline():
+    """Register the Reputation Pipeline in the database."""
+    print(f"Connecting to database...")
+
+    conn = await asyncpg.connect(DB_URL)
+
+    try:
+        # Ensure the registry table exists
+        await conn.execute("""
+            CREATE TABLE IF NOT EXISTS pipeline.registry (
+                pipeline_id VARCHAR(50) PRIMARY KEY,
+                name VARCHAR(255) NOT NULL,
+                description TEXT,
+                version VARCHAR(50) NOT NULL,
+                module_path VARCHAR(500) NOT NULL,
+                stages TEXT[] NOT NULL DEFAULT '{}',
+                input_type VARCHAR(100),
+                config JSONB,
+                is_enabled BOOLEAN NOT NULL DEFAULT TRUE,
+                created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+                updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+            )
+        """)
+
+        # Register the Reputation Pipeline
+        result = await conn.execute("""
+            INSERT INTO pipeline.registry (
+                pipeline_id,
+                name,
+                description,
+                version,
+                module_path,
+                stages,
+                input_type,
+                is_enabled,
+                updated_at
+            )
+            VALUES (
+                'reputation',
+                'Reputation Analytics Pipeline',
+                'Primitives-based classification and reputation scoring. Generates business-facing analytics reports with domain breakdown, key drivers, and actionable insights.',
+                '2.0.0',
+                'reviewiq_pipeline.reputation_pipeline:ReputationPipeline',
+                ARRAY['classify', 'report'],
+                'BusinessInput',
+                TRUE,
+                NOW()
+            )
+            ON CONFLICT (pipeline_id) DO UPDATE SET
+                name = EXCLUDED.name,
+                description = EXCLUDED.description,
+                version = EXCLUDED.version,
+                module_path = EXCLUDED.module_path,
+                stages = EXCLUDED.stages,
+                input_type = EXCLUDED.input_type,
+                is_enabled = EXCLUDED.is_enabled,
+                updated_at = NOW()
+        """)
+
+        print(f"✓ Registered 'reputation' pipeline")
+
+        # Also ensure the ReviewIQ pipeline is registered
+        result = await conn.execute("""
+            INSERT INTO pipeline.registry (
+                pipeline_id,
+                name,
+                description,
+                version,
+                module_path,
+                stages,
+                input_type,
+                is_enabled,
+                updated_at
+            )
+            VALUES (
+                'reviewiq',
+                'ReviewIQ Classification Pipeline',
+                'Classifies reviews using URT taxonomy, detects issues, and aggregates metrics for dashboards.',
+                '1.0.0',
+                'reviewiq_pipeline.pipeline:ReviewIQPipeline',
+                ARRAY['normalize', 'classify', 'route', 'aggregate', 'synthesize'],
+                'ScraperV1Output',
+                TRUE,
+                NOW()
+            )
+            ON CONFLICT (pipeline_id) DO UPDATE SET
+                name = EXCLUDED.name,
+                description = EXCLUDED.description,
+                version = EXCLUDED.version,
+                module_path = EXCLUDED.module_path,
+                stages = EXCLUDED.stages,
+                input_type = EXCLUDED.input_type,
+                is_enabled = EXCLUDED.is_enabled,
+                updated_at = NOW()
+        """)
+
+        print(f"✓ Registered 'reviewiq' pipeline")
+
+        # List all registered pipelines
+        rows = await conn.fetch("""
+            SELECT pipeline_id, name, version, is_enabled, stages
+            FROM pipeline.registry
+            ORDER BY name
+        """)
+
+        print(f"\n📋 Registered Pipelines:")
+        print("-" * 80)
+        for row in rows:
+            status = "✓ enabled" if row["is_enabled"] else "✗ disabled"
+            stages = ", ".join(row["stages"]) if row["stages"] else "none"
+            print(f"  {row['pipeline_id']:20} v{row['version']:10} {status}")
+            print(f"    → {row['name']}")
+            print(f"    → Stages: {stages}")
+            print()
+
+    finally:
+        await conn.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(register_pipeline())
--- a/scripts/resolve_job_categories.py
+++ b/scripts/resolve_job_categories.py
@@ -0,0 +1,414 @@
+#!/usr/bin/env python3
+"""
+Resolve GBP taxonomy categories for all jobs.
+Uses exact match, LLM match, or hierarchical classification.
+
+Usage: source .env && python scripts/resolve_job_categories.py
+"""
+import asyncio
+import os
+import sys
+from dataclasses import dataclass
+from typing import Optional
+
+import asyncpg
+from openai import OpenAI
+
+DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://scraper:scraper123@localhost:5437/scraper')
+
+
+@dataclass
+class ResolvedCategory:
+    """Result of category resolution."""
+    category_id: int
+    path: str
+    name: str
+    level: int
+    method: str  # 'exact', 'llm', 'hierarchical'
+    confidence: float
+
+
+class SimpleLLM:
+    """Simple OpenAI wrapper for category resolution."""
+
+    def __init__(self):
+        self.client = OpenAI()
+
+    async def complete(self, prompt: str, max_tokens: int = 50, temperature: float = 0) -> str:
+        """Get completion from OpenAI."""
+        response = await asyncio.to_thread(
+            self.client.chat.completions.create,
+            model="gpt-4o-mini",
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=max_tokens,
+            temperature=temperature
+        )
+        return response.choices[0].message.content.strip()
+
+
+class CategoryResolver:
+    """Resolves business categories to GBP taxonomy nodes."""
+
+    def __init__(self, pool: asyncpg.Pool, llm: SimpleLLM):
+        self.pool = pool
+        self.llm = llm
+        self._level1_cache: list[dict] = []
+        self._level2_cache: dict[str, list[dict]] = {}
+        self._level3_cache: dict[str, list[dict]] = {}
+
+    async def resolve(
+        self,
+        google_category: Optional[str] = None,
+        business_name: Optional[str] = None,
+        business_address: Optional[str] = None
+    ) -> Optional[ResolvedCategory]:
+        """Resolve to the deepest taxonomy node."""
+
+        # Phase 1: Exact match
+        if google_category:
+            result = await self._exact_match(google_category)
+            if result:
+                return result
+
+            # Phase 2: LLM match
+            result = await self._llm_match(google_category)
+            if result:
+                return result
+
+        # Phase 3: Hierarchical classification
+        if business_name:
+            result = await self._hierarchical_classify(
+                business_name=business_name,
+                business_address=business_address,
+                google_category=google_category
+            )
+            if result:
+                return result
+
+        return None
+
+    async def _exact_match(self, google_category: str) -> Optional[ResolvedCategory]:
+        """Try exact match against taxonomy."""
+        async with self.pool.acquire() as conn:
+            # Exact match (case-insensitive)
+            row = await conn.fetchrow("""
+                SELECT id, name, path::text as path, level
+                FROM gbp_categories
+                WHERE LOWER(name) = LOWER($1) AND level = 3
+            """, google_category)
+
+            if row:
+                return ResolvedCategory(
+                    category_id=row['id'],
+                    path=row['path'],
+                    name=row['name'],
+                    level=row['level'],
+                    method='exact',
+                    confidence=1.0
+                )
+
+            # Trigram similarity match (handles typos, slight variations)
+            # Threshold 0.7 = high confidence only, else fall through to LLM
+            row = await conn.fetchrow("""
+                SELECT id, name, path::text as path, level,
+                       similarity(LOWER(name), LOWER($1)) as sim
+                FROM gbp_categories
+                WHERE level = 3 AND similarity(LOWER(name), LOWER($1)) > 0.7
+                ORDER BY sim DESC
+                LIMIT 1
+            """, google_category)
+
+            if row:
+                return ResolvedCategory(
+                    category_id=row['id'],
+                    path=row['path'],
+                    name=row['name'],
+                    level=row['level'],
+                    method='fuzzy',
+                    confidence=float(row['sim'])
+                )
+
+        return None
+
+    async def _llm_match(self, google_category: str) -> Optional[ResolvedCategory]:
+        """Use LLM to match Google category to taxonomy."""
+        # Synonym expansion for common variations
+        SYNONYMS = {
+            'shop': ['store', 'shop', 'outlet'],
+            'store': ['store', 'shop', 'outlet'],
+            'house': ['house', 'home'],
+            'home': ['house', 'home'],
+            'office': ['office', 'clinic', 'center'],
+            'clinic': ['clinic', 'office', 'center'],
+            'center': ['center', 'centre'],
+            'centre': ['center', 'centre'],
+            'repair': ['repair', 'service', 'maintenance'],
+        }
+
+        async with self.pool.acquire() as conn:
+            # Get candidates using multiple strategies:
+            # 1. Word matches with synonym expansion
+            # 2. Trigram similarity
+            words = google_category.lower().split()
+            expanded_words = set()
+            for w in words:
+                if len(w) > 2:
+                    expanded_words.add(w)
+                    if w in SYNONYMS:
+                        expanded_words.update(SYNONYMS[w])
+
+            word_conditions = " OR ".join([f"LOWER(name) LIKE '%{w}%'" for w in expanded_words])
+            primary_word = google_category.lower().split()[0]  # First word is usually most important
+
+            # Order by: starts with primary word, then by similarity
+            candidates = await conn.fetch(f"""
+                SELECT DISTINCT id, name, path::text as path, level,
+                       CASE WHEN LOWER(name) LIKE $2 THEN 1 ELSE 0 END as starts_with,
+                       similarity(LOWER(name), LOWER($1)) as sim
+                FROM gbp_categories
+                WHERE level = 3 AND (
+                    ({word_conditions if word_conditions else 'FALSE'})
+                    OR similarity(LOWER(name), LOWER($1)) > 0.3
+                )
+                ORDER BY starts_with DESC, sim DESC
+                LIMIT 20
+            """, google_category, f"{primary_word}%")
+
+            if not candidates:
+                return None
+
+        candidate_list = "\n".join([f"- {c['name']}" for c in candidates])
+
+        prompt = f"""Match business category "{google_category}" to the closest option.
+Synonyms: shop=store, house=cafe/home, office=clinic/center
+
+Options:
+{candidate_list}
+
+Reply with ONLY the exact category name from the list."""
+
+        response = await self.llm.complete(prompt, max_tokens=30)
+        selected = response.strip().strip('"').strip("'")
+
+        if selected.upper() == "NONE":
+            return None
+
+        for c in candidates:
+            if c['name'].lower() == selected.lower():
+                return ResolvedCategory(
+                    category_id=c['id'],
+                    path=c['path'],
+                    name=c['name'],
+                    level=c['level'],
+                    method='llm',
+                    confidence=0.85
+                )
+
+        # Fuzzy match selected name to candidates
+        for c in candidates:
+            if selected.lower() in c['name'].lower() or c['name'].lower() in selected.lower():
+                return ResolvedCategory(
+                    category_id=c['id'],
+                    path=c['path'],
+                    name=c['name'],
+                    level=c['level'],
+                    method='llm',
+                    confidence=0.75
+                )
+
+        return None
+
+    async def _hierarchical_classify(
+        self,
+        business_name: str,
+        business_address: Optional[str] = None,
+        google_category: Optional[str] = None
+    ) -> Optional[ResolvedCategory]:
+        """Walk down taxonomy tree using LLM."""
+        context = f"Business: {business_name}"
+        if business_address:
+            context += f"\nAddress: {business_address}"
+        if google_category:
+            context += f"\nHint: {google_category}"
+
+        # Level 1
+        level1 = await self._get_categories(1)
+        sector = await self._llm_select(context, level1, "sector")
+        if not sector:
+            return None
+
+        # Level 2
+        level2 = await self._get_categories(2, sector['path'])
+        biz_type = await self._llm_select(context, level2, "business type", sector['name'])
+        if not biz_type:
+            return None
+
+        # Level 3
+        level3 = await self._get_categories(3, biz_type['path'])
+        specific = await self._llm_select(context, level3, "specific category", biz_type['name'])
+        if not specific:
+            return None
+
+        return ResolvedCategory(
+            category_id=specific['id'],
+            path=specific['path'],
+            name=specific['name'],
+            level=specific['level'],
+            method='hierarchical',
+            confidence=0.7
+        )
+
+    async def _get_categories(self, level: int, parent_path: str = None) -> list[dict]:
+        """Get categories at level, optionally under parent."""
+        async with self.pool.acquire() as conn:
+            if parent_path:
+                rows = await conn.fetch("""
+                    SELECT id, name, path::text as path, level
+                    FROM gbp_categories
+                    WHERE level = $1 AND path <@ $2::ltree
+                    ORDER BY name
+                """, level, parent_path)
+            else:
+                rows = await conn.fetch("""
+                    SELECT id, name, path::text as path, level
+                    FROM gbp_categories
+                    WHERE level = $1
+                    ORDER BY name
+                """, level)
+            return [dict(r) for r in rows]
+
+    async def _llm_select(
+        self,
+        context: str,
+        categories: list[dict],
+        level_name: str,
+        parent: str = None
+    ) -> Optional[dict]:
+        """Ask LLM to select best category."""
+        if not categories:
+            return None
+        if len(categories) == 1:
+            return categories[0]
+
+        cat_list = "\n".join([f"- {c['name']}" for c in categories])
+        parent_ctx = f" within {parent}" if parent else ""
+
+        prompt = f"""{context}
+
+Select the most appropriate {level_name}{parent_ctx}.
+
+Options:
+{cat_list}
+
+Respond with ONLY the exact name from the list."""
+
+        response = await self.llm.complete(prompt)
+        selected = response.strip().strip('"').strip("'")
+
+        for c in categories:
+            if c['name'].lower() == selected.lower():
+                return c
+
+        # Fuzzy fallback
+        for c in categories:
+            if selected.lower() in c['name'].lower():
+                return c
+
+        return categories[0] if categories else None
+
+async def main():
+    # Connect to database
+    pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=5)
+
+    # Initialize LLM client
+    llm = SimpleLLM()
+
+    try:
+        # Get jobs needing category resolution
+        async with pool.acquire() as conn:
+            jobs = await conn.fetch("""
+                SELECT job_id, business_name, business_category, business_address
+                FROM jobs
+                WHERE status = 'completed'
+                  AND gbp_category_path IS NULL
+                ORDER BY created_at DESC
+            """)
+
+        print(f"Found {len(jobs)} jobs needing category resolution\n")
+
+        resolver = CategoryResolver(pool, llm)
+
+        resolved = 0
+        failed = 0
+
+        for job in jobs:
+            job_id = str(job['job_id'])
+            name = job['business_name'] or 'Unknown'
+            google_cat = job['business_category']
+            address = job['business_address']
+
+            print(f"Processing: {name[:50]}...")
+            if google_cat:
+                print(f"  Google category: {google_cat}")
+
+            try:
+                result = await resolver.resolve(
+                    google_category=google_cat,
+                    business_name=name,
+                    business_address=address
+                )
+
+                if result:
+                    # Determine source: google if they had a category, inferred if we used business name
+                    category_source = 'google' if google_cat else 'inferred'
+
+                    # Save to database
+                    async with pool.acquire() as conn:
+                        await conn.execute("""
+                            UPDATE jobs
+                            SET gbp_category_id = $2,
+                                gbp_category_path = $3::ltree,
+                                category_resolution_method = $4,
+                                business_category_source = $5,
+                                updated_at = NOW()
+                            WHERE job_id = $1::uuid
+                        """, job_id, result.category_id, result.path, result.method, category_source)
+
+                    print(f"  ✓ Resolved: {result.path} ({result.method}, source={category_source})")
+                    resolved += 1
+                else:
+                    print(f"  ✗ Could not resolve")
+                    failed += 1
+
+            except Exception as e:
+                print(f"  ✗ Error: {e}")
+                failed += 1
+
+        print(f"\n{'='*50}")
+        print(f"Done! Resolved: {resolved}, Failed: {failed}")
+
+        # Show results
+        async with pool.acquire() as conn:
+            results = await conn.fetch("""
+                SELECT business_name, business_category,
+                       gbp_category_path::text as resolved_path,
+                       category_resolution_method,
+                       business_category_source
+                FROM jobs
+                WHERE status = 'completed' AND gbp_category_path IS NOT NULL
+                ORDER BY created_at DESC
+                LIMIT 10
+            """)
+
+        print(f"\n{'='*50}")
+        print("Recent resolved categories:")
+        for r in results:
+            source = r['business_category_source'] or '-'
+            print(f"  {r['business_name'][:30]:30} | {r['business_category'] or '-':20} | {source:8} -> {r['resolved_path']} ({r['category_resolution_method']})")
+
+    finally:
+        await pool.close()
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
--- a/web/app/analytics/[id]/page.tsx
+++ b/web/app/analytics/[id]/page.tsx
@@ -68,8 +68,11 @@ export default function AnalyticsDetailPage() {
      : `/api/jobs/${jobId}/reviews?limit=10000`;

    fetch(url)
-      .then(res => {
-        if (!res.ok) throw new Error('Failed to fetch reviews');
+      .then(async res => {
+        if (!res.ok) {
+          const errorData = await res.json().catch(() => ({}));
+          throw new Error(errorData.error || `Failed to fetch reviews (${res.status})`);
+        }
        return res.json();
      })
      .then(data => {
--- a/web/app/api/categories/route.ts
+++ b/web/app/api/categories/route.ts
@@ -0,0 +1,63 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';
+const DB_URL = process.env.DATABASE_URL || 'postgresql://scraper:scraper123@localhost:5437/scraper';
+
+// Direct database query for categories
+async function fetchCategoriesFromDB() {
+  // For now, we'll fetch from the API server which has DB access
+  // In production, you might want to use a direct DB connection or cache
+  const response = await fetch(`${API_BASE_URL}/categories/tree`, {
+    cache: 'no-store',
+  });
+
+  if (!response.ok) {
+    throw new Error('Failed to fetch categories from API');
+  }
+
+  return response.json();
+}
+
+export async function GET(request: NextRequest) {
+  try {
+    const searchParams = request.nextUrl.searchParams;
+    const search = searchParams.get('search');
+    const parentPath = searchParams.get('parent');
+    const level = searchParams.get('level');
+
+    // Build query params for backend
+    const params = new URLSearchParams();
+    if (search) params.set('search', search);
+    if (parentPath) params.set('parent', parentPath);
+    if (level) params.set('level', level);
+
+    const url = `${API_BASE_URL}/categories?${params.toString()}`;
+
+    const response = await fetch(url, {
+      cache: 'no-store',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+    });
+
+    if (!response.ok) {
+      // Fallback: return mock data for development
+      console.error('API not available, returning mock data');
+      return NextResponse.json({
+        categories: [],
+        total: 0,
+        message: 'API not available'
+      });
+    }
+
+    const data = await response.json();
+    return NextResponse.json(data);
+
+  } catch (error) {
+    console.error('Error fetching categories:', error);
+    return NextResponse.json(
+      { error: 'Failed to fetch categories', categories: [], total: 0 },
+      { status: 500 }
+    );
+  }
+}
--- a/web/app/api/check-reviews/route.ts
+++ b/web/app/api/check-reviews/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function POST(request: NextRequest) {
  try {
--- a/web/app/api/jobs/[jobId]/compare/route.ts
+++ b/web/app/api/jobs/[jobId]/compare/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 // GET /api/jobs/[jobId]/compare?previous=<previousJobId>
 // Returns reviews from current job with a flag indicating if they're new
@@ -16,8 +16,10 @@ export async function GET(
    // Fetch current job reviews
    const currentResponse = await fetch(`${API_BASE_URL}/jobs/${jobId}/reviews?limit=10000`);
    if (!currentResponse.ok) {
+      const errorText = await currentResponse.text().catch(() => '');
+      console.error(`Failed to get current job reviews: ${currentResponse.status} - ${errorText}`);
      return NextResponse.json(
-        { error: 'Failed to get current job reviews' },
+        { error: `Failed to get reviews for job ${jobId} (${currentResponse.status})` },
        { status: currentResponse.status }
      );
    }
--- a/web/app/api/jobs/[jobId]/crash-report/route.ts
+++ b/web/app/api/jobs/[jobId]/crash-report/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 /**
 * GET /api/jobs/[jobId]/crash-report
--- a/web/app/api/jobs/[jobId]/logs/route.ts
+++ b/web/app/api/jobs/[jobId]/logs/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/jobs/[jobId]/retry/route.ts
+++ b/web/app/api/jobs/[jobId]/retry/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 /**
 * POST /api/jobs/[jobId]/retry
--- a/web/app/api/jobs/[jobId]/reviews/route.ts
+++ b/web/app/api/jobs/[jobId]/reviews/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/jobs/[jobId]/route.ts
+++ b/web/app/api/jobs/[jobId]/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/jobs/[jobId]/stream/route.ts
+++ b/web/app/api/jobs/[jobId]/stream/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export const dynamic = 'force-dynamic';

--- a/web/app/api/jobs/route.ts
+++ b/web/app/api/jobs/route.ts
@@ -1,13 +1,19 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(request: NextRequest) {
  try {
    const { searchParams } = new URL(request.url);
    const limit = searchParams.get('limit') || '100';
+    const status = searchParams.get('status');

-    const response = await fetch(`${API_BASE_URL}/jobs?limit=${limit}`);
+    let url = `${API_BASE_URL}/jobs?limit=${limit}`;
+    if (status) {
+      url += `&status=${status}`;
+    }
+
+    const response = await fetch(url);

    if (!response.ok) {
      return NextResponse.json(
--- a/web/app/api/jobs/stream/route.ts
+++ b/web/app/api/jobs/stream/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export const dynamic = 'force-dynamic';

--- a/web/app/api/pipelines/[pipelineId]/dashboard/route.ts
+++ b/web/app/api/pipelines/[pipelineId]/dashboard/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/pipelines/[pipelineId]/execute/route.ts
+++ b/web/app/api/pipelines/[pipelineId]/execute/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function POST(
  request: NextRequest,
--- a/web/app/api/pipelines/[pipelineId]/executions/[executionId]/route.ts
+++ b/web/app/api/pipelines/[pipelineId]/executions/[executionId]/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/pipelines/[pipelineId]/executions/route.ts
+++ b/web/app/api/pipelines/[pipelineId]/executions/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/pipelines/[pipelineId]/route.ts
+++ b/web/app/api/pipelines/[pipelineId]/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/pipelines/[pipelineId]/widgets/[widgetId]/route.ts
+++ b/web/app/api/pipelines/[pipelineId]/widgets/[widgetId]/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(
  request: NextRequest,
--- a/web/app/api/pipelines/reviewiq/analytics/route.ts
+++ b/web/app/api/pipelines/reviewiq/analytics/route.ts
@@ -0,0 +1,42 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:8001';
+
+/**
+ * Proxy route for ReviewIQ analytics endpoint.
+ * GET /api/pipelines/reviewiq/analytics
+ */
+export async function GET(request: NextRequest) {
+  try {
+    // Forward query parameters
+    const searchParams = request.nextUrl.searchParams;
+    const queryString = searchParams.toString();
+
+    const url = `${API_BASE_URL}/api/pipelines/reviewiq/analytics${queryString ? `?${queryString}` : ''}`;
+
+    const response = await fetch(url, {
+      method: 'GET',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      cache: 'no-store',
+    });
+
+    if (!response.ok) {
+      const errorData = await response.json().catch(() => ({}));
+      return NextResponse.json(
+        { detail: errorData.detail || `Backend error: ${response.status}` },
+        { status: response.status }
+      );
+    }
+
+    const data = await response.json();
+    return NextResponse.json(data);
+  } catch (error) {
+    console.error('ReviewIQ analytics proxy error:', error);
+    return NextResponse.json(
+      { detail: 'Failed to fetch analytics data' },
+      { status: 500 }
+    );
+  }
+}
--- a/web/app/api/pipelines/reviewiq/issues/[issueId]/spans/route.ts
+++ b/web/app/api/pipelines/reviewiq/issues/[issueId]/spans/route.ts
@@ -0,0 +1,42 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:8001';
+
+/**
+ * Proxy route for fetching spans related to an issue.
+ * GET /api/pipelines/reviewiq/issues/[issueId]/spans
+ */
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ issueId: string }> }
+) {
+  try {
+    const { issueId } = await params;
+    const url = `${API_BASE_URL}/api/pipelines/reviewiq/issues/${issueId}/spans`;
+
+    const response = await fetch(url, {
+      method: 'GET',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      cache: 'no-store',
+    });
+
+    if (!response.ok) {
+      const errorData = await response.json().catch(() => ({}));
+      return NextResponse.json(
+        { detail: errorData.detail || `Backend error: ${response.status}` },
+        { status: response.status }
+      );
+    }
+
+    const data = await response.json();
+    return NextResponse.json(data);
+  } catch (error) {
+    console.error('Issue spans proxy error:', error);
+    return NextResponse.json(
+      { detail: 'Failed to fetch issue spans' },
+      { status: 500 }
+    );
+  }
+}
--- a/web/app/api/pipelines/reviewiq/reviews/[reviewId]/route.ts
+++ b/web/app/api/pipelines/reviewiq/reviews/[reviewId]/route.ts
@@ -0,0 +1,43 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_BASE = process.env.API_URL || 'http://localhost:8001';
+
+/**
+ * GET /api/pipelines/reviewiq/reviews/[reviewId]
+ * Proxy to backend for fetching a full review with all its spans.
+ */
+export async function GET(
+  request: NextRequest,
+  { params }: { params: Promise<{ reviewId: string }> }
+) {
+  const { reviewId } = await params;
+  const { searchParams } = new URL(request.url);
+  const source = searchParams.get('source') || 'google';
+
+  try {
+    const url = `${API_BASE}/api/pipelines/reviewiq/reviews/${encodeURIComponent(reviewId)}?source=${encodeURIComponent(source)}`;
+
+    const response = await fetch(url, {
+      headers: {
+        'Content-Type': 'application/json',
+      },
+    });
+
+    if (!response.ok) {
+      const error = await response.text();
+      return NextResponse.json(
+        { error: `Backend error: ${error}` },
+        { status: response.status }
+      );
+    }
+
+    const data = await response.json();
+    return NextResponse.json(data);
+  } catch (error) {
+    console.error('Error fetching review:', error);
+    return NextResponse.json(
+      { error: 'Failed to fetch review' },
+      { status: 500 }
+    );
+  }
+}
--- a/web/app/api/pipelines/reviewiq/trends/route.ts
+++ b/web/app/api/pipelines/reviewiq/trends/route.ts
@@ -0,0 +1,42 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_BASE_URL = process.env.API_BASE_URL || 'http://localhost:8001';
+
+/**
+ * Proxy route for ReviewIQ trends endpoint.
+ * GET /api/pipelines/reviewiq/trends
+ */
+export async function GET(request: NextRequest) {
+  try {
+    // Forward query parameters
+    const searchParams = request.nextUrl.searchParams;
+    const queryString = searchParams.toString();
+
+    const url = `${API_BASE_URL}/api/pipelines/reviewiq/trends${queryString ? `?${queryString}` : ''}`;
+
+    const response = await fetch(url, {
+      method: 'GET',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      cache: 'no-store',
+    });
+
+    if (!response.ok) {
+      const errorData = await response.json().catch(() => ({}));
+      return NextResponse.json(
+        { detail: errorData.detail || `Backend error: ${response.status}` },
+        { status: response.status }
+      );
+    }
+
+    const data = await response.json();
+    return NextResponse.json(data);
+  } catch (error) {
+    console.error('ReviewIQ trends proxy error:', error);
+    return NextResponse.json(
+      { detail: 'Failed to fetch trends data' },
+      { status: 500 }
+    );
+  }
+}
--- a/web/app/api/pipelines/route.ts
+++ b/web/app/api/pipelines/route.ts
@@ -1,6 +1,6 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function GET(request: NextRequest) {
  try {
--- a/web/app/api/scrape/route.ts
+++ b/web/app/api/scrape/route.ts
@@ -1,16 +1,38 @@
 import { NextRequest, NextResponse } from 'next/server';

-const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000';
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';

 export async function POST(request: NextRequest) {
  try {
    const body = await request.json();
-    const { url, business_name, business_address, rating_snapshot, total_reviews_snapshot, scraper_version } = body;
+    const { url, business_name, business_address, rating_snapshot, total_reviews_snapshot, scraper_version, session_id, browser_fingerprint, geolocation } = body;

    if (!url) {
      return NextResponse.json({ error: 'URL is required' }, { status: 400 });
    }

+    // Build metadata object
+    const metadata: Record<string, unknown> = {
+      business_name,
+      business_address,
+      rating_snapshot,
+      total_reviews_snapshot,
+      scraper_version,  // Store in metadata for job tracking
+    };
+
+    // Include session_id for browser reuse (session handoff from validation)
+    if (session_id) {
+      metadata.session_id = session_id;
+    }
+
+    // Include browser fingerprint if provided
+    if (browser_fingerprint) {
+      metadata.browser_fingerprint = browser_fingerprint;
+    }
+    if (geolocation) {
+      metadata.geolocation = geolocation;
+    }
+
    // Call the containerized scraper API with business metadata and version
    const response = await fetch(`${API_BASE_URL}/scrape`, {
      method: 'POST',
@@ -18,13 +40,8 @@ export async function POST(request: NextRequest) {
      body: JSON.stringify({
        url,
        scraper_version,  // Pass version to backend for routing
-        metadata: {
-          business_name,
-          business_address,
-          rating_snapshot,
-          total_reviews_snapshot,
-          scraper_version,  // Also store in metadata for job tracking
-        },
+        session_id,  // Pass session_id for browser reuse
+        metadata,
      }),
    });

--- a/web/app/api/sessions/validate/route.ts
+++ b/web/app/api/sessions/validate/route.ts
@@ -0,0 +1,37 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const API_BASE_URL = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+
+    if (!body.url) {
+      return NextResponse.json({ error: 'URL is required' }, { status: 400 });
+    }
+
+    // Call the backend session validation endpoint
+    const response = await fetch(`${API_BASE_URL}/sessions/validate`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+
+    const data = await response.json();
+
+    if (!response.ok) {
+      return NextResponse.json(
+        { error: data.detail || 'Failed to validate session' },
+        { status: response.status }
+      );
+    }
+
+    return NextResponse.json(data);
+  } catch (error) {
+    console.error('Session validation API error:', error);
+    return NextResponse.json(
+      { error: 'Failed to connect to scraper API' },
+      { status: 500 }
+    );
+  }
+}
--- a/web/app/categories/page.tsx
+++ b/web/app/categories/page.tsx
@@ -0,0 +1,435 @@
+'use client';
+
+import { useState, useEffect, useMemo, useCallback } from 'react';
+import { Search, TreePine, Network, ChevronRight, ChevronDown, Folder, FolderOpen, Tag, Loader2 } from 'lucide-react';
+import dynamic from 'next/dynamic';
+import {
+  Category,
+  CategoryTreeNode,
+  buildCategoryTree,
+  toD3Tree,
+  getLevelName,
+  getLevelColor,
+  searchCategories,
+  getCategoryBreadcrumb,
+} from '@/lib/categories';
+
+// Dynamic import for react-d3-tree (SSR issues)
+const Tree = dynamic(() => import('react-d3-tree').then((mod) => mod.default), {
+  ssr: false,
+  loading: () => <div className="flex items-center justify-center h-full"><Loader2 className="animate-spin" /></div>,
+});
+
+// API base URL
+const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8001';
+
+export default function CategoriesPage() {
+  const [categories, setCategories] = useState<Category[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [searchQuery, setSearchQuery] = useState('');
+  const [viewMode, setViewMode] = useState<'explorer' | 'diagram'>('explorer');
+  const [expandedPaths, setExpandedPaths] = useState<Set<string>>(new Set());
+  const [selectedCategory, setSelectedCategory] = useState<Category | null>(null);
+  const [stats, setStats] = useState({ total: 0, sectors: 0, types: 0, subs: 0, leaves: 0 });
+
+  // Fetch categories from API
+  useEffect(() => {
+    async function fetchCategories() {
+      try {
+        setLoading(true);
+        const response = await fetch(`${API_BASE}/categories`);
+
+        if (!response.ok) {
+          throw new Error('Failed to fetch categories');
+        }
+
+        const data = await response.json();
+        setCategories(data.categories || []);
+        setStats({
+          total: data.total || 0,
+          sectors: data.categories?.filter((c: Category) => c.level === 1).length || 0,
+          types: data.categories?.filter((c: Category) => c.level === 2).length || 0,
+          subs: data.categories?.filter((c: Category) => c.level === 3).length || 0,
+          leaves: data.categories?.filter((c: Category) => c.level === 4).length || 0,
+        });
+
+        // Expand level 1 by default
+        const level1Paths = new Set<string>(
+          data.categories
+            ?.filter((c: Category) => c.level === 1)
+            .map((c: Category) => c.path) || []
+        );
+        setExpandedPaths(level1Paths);
+      } catch (err) {
+        console.error('Error fetching categories:', err);
+        setError(err instanceof Error ? err.message : 'Failed to load categories');
+      } finally {
+        setLoading(false);
+      }
+    }
+
+    fetchCategories();
+  }, []);
+
+  // Filter categories based on search
+  const filteredCategories = useMemo(() => {
+    if (!searchQuery.trim()) return categories;
+    return searchCategories(categories, searchQuery);
+  }, [categories, searchQuery]);
+
+  // Build tree structure
+  const tree = useMemo(() => buildCategoryTree(filteredCategories), [filteredCategories]);
+
+  // D3 tree data
+  const d3TreeData = useMemo(() => {
+    if (tree.length === 0) return null;
+    return {
+      name: 'GBP Categories',
+      children: toD3Tree(tree),
+    };
+  }, [tree]);
+
+  // Toggle expand/collapse
+  const toggleExpand = useCallback((path: string) => {
+    setExpandedPaths((prev) => {
+      const next = new Set(prev);
+      if (next.has(path)) {
+        next.delete(path);
+      } else {
+        next.add(path);
+      }
+      return next;
+    });
+  }, []);
+
+  // Expand all ancestors when searching
+  useEffect(() => {
+    if (searchQuery.trim()) {
+      const pathsToExpand = new Set<string>();
+      for (const cat of filteredCategories) {
+        const parts = cat.path.split('.');
+        for (let i = 1; i < parts.length; i++) {
+          pathsToExpand.add(parts.slice(0, i).join('.'));
+        }
+      }
+      setExpandedPaths(pathsToExpand);
+    }
+  }, [searchQuery, filteredCategories]);
+
+  // Get breadcrumb for selected category
+  const breadcrumb = useMemo(() => {
+    if (!selectedCategory) return [];
+    return getCategoryBreadcrumb(selectedCategory.path, categories);
+  }, [selectedCategory, categories]);
+
+  // Render tree node (recursive)
+  const renderTreeNode = (node: CategoryTreeNode, depth: number = 0) => {
+    const isExpanded = expandedPaths.has(node.id);
+    const hasChildren = node.children && node.children.length > 0;
+    const isSelected = selectedCategory?.path === node.id;
+    const level = node.data?.level || 1;
+
+    return (
+      <div key={node.id} className="select-none">
+        <div
+          className={`flex items-center gap-2 py-1.5 px-2 rounded cursor-pointer hover:bg-gray-100 dark:hover:bg-gray-800 ${
+            isSelected ? 'bg-blue-50 dark:bg-blue-900/30 border-l-2 border-blue-500' : ''
+          }`}
+          style={{ paddingLeft: `${depth * 20 + 8}px` }}
+          onClick={() => {
+            setSelectedCategory(node.data || null);
+            if (hasChildren) {
+              toggleExpand(node.id);
+            }
+          }}
+        >
+          {/* Expand/Collapse Icon */}
+          <span className="w-4 h-4 flex items-center justify-center">
+            {hasChildren ? (
+              isExpanded ? (
+                <ChevronDown className="w-4 h-4 text-gray-500" />
+              ) : (
+                <ChevronRight className="w-4 h-4 text-gray-500" />
+              )
+            ) : (
+              <span className="w-4" />
+            )}
+          </span>
+
+          {/* Folder/Tag Icon */}
+          {hasChildren ? (
+            isExpanded ? (
+              <FolderOpen className="w-4 h-4 text-yellow-500" />
+            ) : (
+              <Folder className="w-4 h-4 text-yellow-600" />
+            )
+          ) : (
+            <Tag className="w-4 h-4 text-purple-500" />
+          )}
+
+          {/* Name */}
+          <span className="flex-1 truncate text-sm">{node.name}</span>
+
+          {/* Level Badge */}
+          <span
+            className={`text-xs px-1.5 py-0.5 rounded ${getLevelColor(level)} text-white`}
+          >
+            L{level}
+          </span>
+
+          {/* Count */}
+          {node.data && node.data.category_count > 0 && (
+            <span className="text-xs text-gray-400">
+              ({node.data.category_count})
+            </span>
+          )}
+        </div>
+
+        {/* Children */}
+        {hasChildren && isExpanded && (
+          <div>
+            {node.children!.map((child) => renderTreeNode(child, depth + 1))}
+          </div>
+        )}
+      </div>
+    );
+  };
+
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center min-h-screen">
+        <Loader2 className="w-8 h-8 animate-spin text-blue-500" />
+        <span className="ml-2">Loading categories...</span>
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div className="flex flex-col items-center justify-center min-h-screen">
+        <p className="text-red-500 mb-4">{error}</p>
+        <button
+          onClick={() => window.location.reload()}
+          className="px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600"
+        >
+          Retry
+        </button>
+      </div>
+    );
+  }
+
+  return (
+    <div className="min-h-screen bg-gray-50 dark:bg-gray-900">
+      {/* Header */}
+      <header className="bg-white dark:bg-gray-800 shadow-sm border-b">
+        <div className="max-w-7xl mx-auto px-4 py-4">
+          <div className="flex items-center justify-between">
+            <div>
+              <h1 className="text-2xl font-bold text-gray-900 dark:text-white">
+                GBP Category Explorer
+              </h1>
+              <p className="text-sm text-gray-500 mt-1">
+                Browse {stats.total.toLocaleString()} Google Business Profile categories
+              </p>
+            </div>
+
+            {/* Stats */}
+            <div className="flex gap-4 text-sm">
+              <div className="text-center">
+                <div className="font-bold text-blue-600">{stats.sectors}</div>
+                <div className="text-gray-500">Sectors</div>
+              </div>
+              <div className="text-center">
+                <div className="font-bold text-green-600">{stats.types}</div>
+                <div className="text-gray-500">Types</div>
+              </div>
+              <div className="text-center">
+                <div className="font-bold text-yellow-600">{stats.subs}</div>
+                <div className="text-gray-500">Sub-cats</div>
+              </div>
+              <div className="text-center">
+                <div className="font-bold text-purple-600">{stats.leaves}</div>
+                <div className="text-gray-500">Categories</div>
+              </div>
+            </div>
+          </div>
+        </div>
+      </header>
+
+      {/* Toolbar */}
+      <div className="bg-white dark:bg-gray-800 border-b px-4 py-3">
+        <div className="max-w-7xl mx-auto flex items-center gap-4">
+          {/* Search */}
+          <div className="relative flex-1 max-w-md">
+            <Search className="absolute left-3 top-1/2 -translate-y-1/2 w-4 h-4 text-gray-400" />
+            <input
+              type="text"
+              placeholder="Search categories..."
+              value={searchQuery}
+              onChange={(e) => setSearchQuery(e.target.value)}
+              className="w-full pl-10 pr-4 py-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-gray-700 dark:border-gray-600"
+            />
+          </div>
+
+          {/* View Toggle */}
+          <div className="flex border rounded-lg overflow-hidden">
+            <button
+              onClick={() => setViewMode('explorer')}
+              className={`px-4 py-2 flex items-center gap-2 ${
+                viewMode === 'explorer'
+                  ? 'bg-blue-500 text-white'
+                  : 'bg-white dark:bg-gray-700 hover:bg-gray-50'
+              }`}
+            >
+              <TreePine className="w-4 h-4" />
+              Explorer
+            </button>
+            <button
+              onClick={() => setViewMode('diagram')}
+              className={`px-4 py-2 flex items-center gap-2 ${
+                viewMode === 'diagram'
+                  ? 'bg-blue-500 text-white'
+                  : 'bg-white dark:bg-gray-700 hover:bg-gray-50'
+              }`}
+            >
+              <Network className="w-4 h-4" />
+              Diagram
+            </button>
+          </div>
+
+          {/* Results count */}
+          {searchQuery && (
+            <span className="text-sm text-gray-500">
+              {filteredCategories.length} results
+            </span>
+          )}
+        </div>
+      </div>
+
+      {/* Main Content */}
+      <div className="px-4 py-6">
+        {viewMode === 'explorer' ? (
+          /* Explorer View - Full Width */
+          <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-4">
+            {tree.length > 0 ? (
+              tree.map((node) => renderTreeNode(node))
+            ) : (
+              <div className="text-center text-gray-500 py-8">
+                No categories found
+              </div>
+            )}
+          </div>
+        ) : (
+          /* Diagram View with Detail Panel */
+          <div className="flex gap-6 h-[calc(100vh-180px)]">
+            <div className="flex-1 bg-white dark:bg-gray-800 rounded-lg shadow overflow-hidden">
+              <div className="h-full w-full">
+                {d3TreeData ? (
+                  <Tree
+                    data={d3TreeData}
+                    orientation="vertical"
+                    pathFunc="step"
+                    translate={{ x: 400, y: 50 }}
+                    separation={{ siblings: 1, nonSiblings: 2 }}
+                    nodeSize={{ x: 200, y: 80 }}
+                    renderCustomNodeElement={({ nodeDatum, toggleNode }) => (
+                      <g onClick={toggleNode}>
+                        <circle r={15} fill="#3b82f6" />
+                        <text
+                          fill="#1f2937"
+                          strokeWidth="0"
+                          x={20}
+                          dy=".35em"
+                          fontSize={12}
+                          fontFamily="sans-serif"
+                        >
+                          {nodeDatum.name.length > 25
+                            ? nodeDatum.name.slice(0, 25) + '...'
+                            : nodeDatum.name}
+                        </text>
+                      </g>
+                    )}
+                  />
+                ) : (
+                  <div className="flex items-center justify-center h-full text-gray-500">
+                    No data to display
+                  </div>
+                )}
+              </div>
+            </div>
+
+            {/* Detail Panel - Only in Diagram Mode */}
+            {selectedCategory && (
+              <div className="w-80 bg-white dark:bg-gray-800 rounded-lg shadow p-4">
+                <h3 className="font-bold text-lg mb-4">{selectedCategory.name}</h3>
+
+                {/* Breadcrumb */}
+                <div className="mb-4">
+                  <span className="text-xs text-gray-500 uppercase">Path</span>
+                  <div className="flex flex-wrap gap-1 mt-1">
+                    {breadcrumb.map((cat, i) => (
+                      <span key={cat.path} className="flex items-center">
+                        <span
+                          className={`text-xs px-2 py-1 rounded ${getLevelColor(cat.level)} text-white cursor-pointer hover:opacity-80`}
+                          onClick={() => setSelectedCategory(cat)}
+                        >
+                          {cat.name}
+                        </span>
+                        {i < breadcrumb.length - 1 && (
+                          <ChevronRight className="w-3 h-3 text-gray-400 mx-1" />
+                        )}
+                      </span>
+                    ))}
+                  </div>
+                </div>
+
+                {/* Details */}
+                <div className="space-y-3 text-sm">
+                  <div>
+                    <span className="text-gray-500">Level:</span>
+                    <span className="ml-2 font-medium">
+                      {getLevelName(selectedCategory.level)} (L{selectedCategory.level})
+                    </span>
+                  </div>
+                  <div>
+                    <span className="text-gray-500">Path:</span>
+                    <code className="ml-2 text-xs bg-gray-100 dark:bg-gray-700 px-2 py-1 rounded">
+                      {selectedCategory.path}
+                    </code>
+                  </div>
+                  <div>
+                    <span className="text-gray-500">Children:</span>
+                    <span className="ml-2 font-medium">
+                      {selectedCategory.category_count}
+                    </span>
+                  </div>
+                  <div>
+                    <span className="text-gray-500">Slug:</span>
+                    <code className="ml-2 text-xs bg-gray-100 dark:bg-gray-700 px-2 py-1 rounded">
+                      {selectedCategory.slug}
+                    </code>
+                  </div>
+                </div>
+
+                {/* Use in search */}
+                <div className="mt-6 pt-4 border-t">
+                  <button
+                    className="w-full py-2 px-4 bg-blue-500 text-white rounded hover:bg-blue-600 text-sm"
+                    onClick={() => {
+                      // Copy ltree path for use in queries
+                      navigator.clipboard.writeText(selectedCategory.path);
+                      alert('Path copied to clipboard!');
+                    }}
+                  >
+                    Copy Path for Query
+                  </button>
+                </div>
+              </div>
+            )}
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
--- a/web/app/globals.css
+++ b/web/app/globals.css
@@ -29,11 +29,11 @@ body {
@keyframes fade-in {
  from {
    opacity: 0;
-    transform: translateX(-50%) translateY(4px);
+    transform: translateY(4px);
  }
  to {
    opacity: 1;
-    transform: translateX(-50%) translateY(0);
+    transform: translateY(0);
  }
 }

--- a/web/app/layout.tsx
+++ b/web/app/layout.tsx
@@ -32,10 +32,12 @@ export default function RootLayout({
        <JobsProvider>
          <div className="h-screen w-screen overflow-hidden flex">
            <Sidebar />
-            <div className="flex-1 bg-gray-50 overflow-hidden">
+            <div className="flex-1 bg-gray-50 overflow-auto">
              {children}
            </div>
          </div>
+          {/* Portal target for modals - outside overflow-hidden container */}
+          <div id="modal-root" />
        </JobsProvider>
      </body>
    </html>
--- a/web/app/pipelines/[pipelineId]/analytics/page.tsx
+++ b/web/app/pipelines/[pipelineId]/analytics/page.tsx
@@ -0,0 +1,190 @@
+'use client';
+
+import { useEffect, useState } from 'react';
+import { useParams, useSearchParams, useRouter } from 'next/navigation';
+import Link from 'next/link';
+import { ArrowLeft, Loader2, FileText, BarChart3 } from 'lucide-react';
+import { DynamicDashboard } from '@/components/dashboard/DynamicDashboard';
+import { ReviewIQDashboard } from '@/components/reviewiq';
+import { getDashboardConfig } from '@/lib/pipeline-api';
+import type { DashboardConfig } from '@/lib/pipeline-types';
+
+// Lazy load Report tab
+import dynamic from 'next/dynamic';
+const ReportTab = dynamic(() => import('@/components/reviewiq/ReportTab').then(m => m.ReportTab), {
+  loading: () => <div className="flex items-center justify-center min-h-[400px]"><Loader2 className="w-8 h-8 animate-spin text-blue-600" /></div>
+});
+
+type ReviewIQTab = 'report' | 'dashboard';
+
+export default function PipelineAnalyticsPage() {
+  const params = useParams();
+  const searchParams = useSearchParams();
+
+  const pipelineId = params.pipelineId as string;
+  const jobId = searchParams.get('job_id') || undefined;
+  const businessId = searchParams.get('business_id') || undefined;
+
+  const [config, setConfig] = useState<DashboardConfig | null>(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+
+  // Use the handcrafted ReviewIQ dashboard for the reviewiq pipeline
+  const useReviewIQDashboard = pipelineId === 'reviewiq';
+
+  // Tab state for ReviewIQ
+  const router = useRouter();
+  const viewParam = searchParams.get('view') as ReviewIQTab | null;
+  const [activeTab, setActiveTab] = useState<ReviewIQTab>(viewParam || 'report');
+
+  // Update URL when tab changes
+  const handleTabChange = (tab: ReviewIQTab) => {
+    setActiveTab(tab);
+    const params = new URLSearchParams(searchParams.toString());
+    if (tab === 'report') {
+      params.delete('view');
+    } else {
+      params.set('view', tab);
+    }
+    router.push(`/pipelines/${pipelineId}/analytics?${params.toString()}`, { scroll: false });
+  };
+
+  useEffect(() => {
+    // Skip config fetch for ReviewIQ - it uses its own optimized endpoint
+    if (useReviewIQDashboard) {
+      setLoading(false);
+      return;
+    }
+
+    async function fetchConfig() {
+      try {
+        setLoading(true);
+        const dashboardConfig = await getDashboardConfig(pipelineId);
+        setConfig(dashboardConfig);
+      } catch (err) {
+        setError(err instanceof Error ? err.message : 'Failed to load dashboard config');
+      } finally {
+        setLoading(false);
+      }
+    }
+
+    fetchConfig();
+  }, [pipelineId, useReviewIQDashboard]);
+
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center min-h-[400px]">
+        <Loader2 className="w-8 h-8 animate-spin text-blue-600" />
+      </div>
+    );
+  }
+
+  // Use handcrafted ReviewIQ Dashboard with tabs
+  if (useReviewIQDashboard) {
+    const tabs = [
+      { id: 'report' as const, label: 'Report', icon: FileText },
+      { id: 'dashboard' as const, label: 'Dashboard', icon: BarChart3 },
+    ];
+
+    return (
+      <div className="h-full overflow-y-auto p-6">
+        {/* Navigation breadcrumb */}
+        <div className="mb-4">
+          <Link
+            href={`/pipelines/${pipelineId}`}
+            className="inline-flex items-center text-sm text-gray-600 hover:text-gray-900"
+          >
+            <ArrowLeft className="w-4 h-4 mr-1" />
+            Back to ReviewIQ Pipeline
+          </Link>
+        </div>
+
+        {/* Job context indicator */}
+        {jobId && (
+          <div className="mb-4 bg-blue-50 border border-blue-200 rounded-lg p-3 text-sm text-blue-700">
+            Showing results for job: <code className="bg-blue-100 px-1 rounded">{jobId}</code>
+          </div>
+        )}
+
+        {/* Tab Navigation */}
+        <div className="mb-6 border-b border-gray-200">
+          <nav className="flex gap-2" aria-label="Tabs">
+            {tabs.map((tab) => {
+              const Icon = tab.icon;
+              const isActive = activeTab === tab.id;
+              return (
+                <button
+                  key={tab.id}
+                  onClick={() => handleTabChange(tab.id)}
+                  className={`
+                    relative px-4 py-2.5 flex items-center gap-2 text-sm font-medium transition-colors
+                    ${isActive
+                      ? 'text-blue-600'
+                      : 'text-gray-500 hover:text-gray-700'
+                    }
+                  `}
+                >
+                  <Icon className={`w-4 h-4 ${isActive ? 'text-blue-600' : 'text-gray-400'}`} />
+                  <span>{tab.label}</span>
+                  {/* Active indicator bar */}
+                  <span
+                    className={`absolute bottom-0 left-0 right-0 h-0.5 bg-blue-600 transition-opacity ${isActive ? 'opacity-100' : 'opacity-0'}`}
+                  />
+                </button>
+              );
+            })}
+          </nav>
+        </div>
+
+        {/* Tab Content */}
+        {activeTab === 'report' && (
+          <ReportTab jobId={jobId} businessId={businessId} />
+        )}
+        {activeTab === 'dashboard' && (
+          <ReviewIQDashboard jobId={jobId} businessId={businessId} />
+        )}
+      </div>
+    );
+  }
+
+  // Fallback for other pipelines using dynamic dashboard
+  if (error || !config) {
+    return (
+      <div className="p-6">
+        <div className="bg-red-50 border border-red-200 rounded-lg p-4 text-red-700">
+          {error || 'Failed to load dashboard configuration'}
+        </div>
+      </div>
+    );
+  }
+
+  return (
+    <div className="h-full overflow-y-auto p-6">
+      {/* Navigation breadcrumb */}
+      <div className="mb-4">
+        <Link
+          href={`/pipelines/${pipelineId}`}
+          className="inline-flex items-center text-sm text-gray-600 hover:text-gray-900"
+        >
+          <ArrowLeft className="w-4 h-4 mr-1" />
+          Back to {pipelineId} Pipeline
+        </Link>
+      </div>
+
+      {/* Job context indicator */}
+      {jobId && (
+        <div className="mb-4 bg-blue-50 border border-blue-200 rounded-lg p-3 text-sm text-blue-700">
+          Showing results for job: <code className="bg-blue-100 px-1 rounded">{jobId}</code>
+        </div>
+      )}
+
+      {/* Dynamic Dashboard for other pipelines */}
+      <DynamicDashboard
+        pipelineId={pipelineId}
+        config={config}
+        businessId={businessId}
+        jobId={jobId}
+      />
+    </div>
+  );
+}
--- a/web/app/pipelines/[pipelineId]/executions/[executionId]/page.tsx
+++ b/web/app/pipelines/[pipelineId]/executions/[executionId]/page.tsx
@@ -16,6 +16,7 @@ import {
  ExternalLink,
  Timer,
  ArrowRightLeft,
+  BarChart3,
 } from 'lucide-react';
 import type { ExecutionStatus, StageMetrics } from '@/lib/pipeline-types';
 import { getExecution } from '@/lib/pipeline-api';
@@ -432,6 +433,22 @@ export default function ExecutionDetailPage() {
            </span>
          </div>
        )}
+
+        {/* View Results Dashboard Button */}
+        {execution?.status === 'completed' && execution?.job_id && (
+          <div className="mt-6 pt-4 border-t border-gray-200">
+            <Link
+              href={`/pipelines/${pipelineId}/analytics?job_id=${execution.job_id}`}
+              className="inline-flex items-center px-4 py-2.5 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors font-medium"
+            >
+              <BarChart3 className="w-5 h-5 mr-2" />
+              View Results Dashboard
+            </Link>
+            <p className="mt-2 text-sm text-gray-500">
+              See classification results, sentiment analysis, and identified issues
+            </p>
+          </div>
+        )}
      </div>

      {/* Error Message */}
--- a/Show More
+++ b/Show More