feat(pipeline): Add Stage 5 Synthesis for AI-generated narratives

- Add Stage5Synthesizer class that generates AI narratives and action plans - Add generate() method to LLMClient for synthesis generation - Integrate Stage 5 into pipeline runner after route stage - Add synthesis JSONB column to pipeline.executions table - Update reviewiq_analytics API to return synthesis data - Synthesis includes: executive narrative, sentiment/category/timeline insights, action plan, marketing angles, and priority recommendations Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 03:12:53 +00:00
parent c8ecb4b98f
commit 9b667e69a7
5 changed files with 3129 additions and 67 deletions
--- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/pipeline.py
+++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/pipeline.py
@@ -7,9 +7,11 @@ the BasePipeline interface for the extensible pipeline system.

 from __future__ import annotations

+import json
 import logging
+import re
 import time
-from datetime import date
+from datetime import date, datetime, timedelta
 from typing import TYPE_CHECKING, Any

 from pipeline_core import (
@@ -51,6 +53,8 @@ from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer
 from reviewiq_pipeline.stages.stage2_classify import Stage2Classifier
 from reviewiq_pipeline.stages.stage3_route import Stage3Router
 from reviewiq_pipeline.stages.stage4_aggregate import Stage4Aggregator
+from reviewiq_pipeline.stages.stage5_synthesize import Stage5Synthesizer
+from reviewiq_pipeline.services.llm_client import LLMClient
 from reviewiq_pipeline.validation.validators import (
    validate_stage1_output,
    validate_stage2_output,
@@ -64,9 +68,65 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)

 # Stage name to number mapping
-STAGE_NAMES = ["normalize", "classify", "route", "aggregate"]
-STAGE_NAME_TO_NUM = {"normalize": 1, "classify": 2, "route": 3, "aggregate": 4}
-STAGE_NUM_TO_NAME = {1: "normalize", 2: "classify", 3: "route", 4: "aggregate"}
+STAGE_NAMES = ["normalize", "classify", "route", "aggregate", "synthesize"]
+STAGE_NAME_TO_NUM = {"normalize": 1, "classify": 2, "route": 3, "aggregate": 4, "synthesize": 5}
+STAGE_NUM_TO_NAME = {1: "normalize", 2: "classify", 3: "route", 4: "aggregate", 5: "synthesize"}
+
+
+def _parse_relative_date(date_str: str | None, default_to_now: bool = True) -> datetime | None:
+    """Parse relative date strings like '10 months ago' into datetime objects.
+
+    Args:
+        date_str: A relative date string (e.g., "10 months ago", "2 weeks ago")
+                  or an ISO date string, or None.
+        default_to_now: If True, returns current datetime when parsing fails.
+
+    Returns:
+        A datetime object, or None if parsing fails and default_to_now is False.
+    """
+    now = datetime.now()
+
+    if not date_str:
+        return now if default_to_now else None
+
+    # Try to parse as ISO date first
+    try:
+        return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+    except (ValueError, AttributeError):
+        pass
+
+    # Parse relative dates like "10 months ago", "2 weeks ago", "a day ago"
+    date_str = date_str.lower().strip()
+
+    # Handle "a/an" as 1
+    date_str = re.sub(r'\b(a|an)\s+', '1 ', date_str)
+
+    # Extract number and unit
+    match = re.match(r'(\d+)\s*(second|minute|hour|day|week|month|year)s?\s*ago', date_str)
+    if match:
+        amount = int(match.group(1))
+        unit = match.group(2)
+
+        if unit == 'second':
+            return now - timedelta(seconds=amount)
+        elif unit == 'minute':
+            return now - timedelta(minutes=amount)
+        elif unit == 'hour':
+            return now - timedelta(hours=amount)
+        elif unit == 'day':
+            return now - timedelta(days=amount)
+        elif unit == 'week':
+            return now - timedelta(weeks=amount)
+        elif unit == 'month':
+            # Approximate months as 30 days
+            return now - timedelta(days=amount * 30)
+        elif unit == 'year':
+            # Approximate years as 365 days
+            return now - timedelta(days=amount * 365)
+
+    # If we can't parse it, return now or None
+    logger.warning(f"Could not parse relative date: {date_str}")
+    return now if default_to_now else None


 class PipelineResult:
@@ -228,8 +288,11 @@ class ReviewIQPipeline(BasePipeline):
        stages_run: list[str] = []
        stage_results: dict[str, StageResult] = {}

-        # Convert input to ScraperOutput if needed
-        scraper_output = self._ensure_scraper_output(input_data)
+        # Convert input to ScraperOutput if needed (may fetch from DB)
+        scraper_output = await self._ensure_scraper_output(input_data)
+
+        # Extract job_id for linking issues to pipeline executions
+        job_id = scraper_output.get("job_id")

        # Track intermediate results for stage dependencies
        stage1_result: Stage1Output | None = None
@@ -270,6 +333,20 @@ class ReviewIQPipeline(BasePipeline):
                    )

            # Stage 2: Classify
+            # If classify is requested but we don't have stage1_result, try to fetch from DB
+            if "classify" in stages and not stage1_result and job_id:
+                logger.info("No stage1_result, fetching existing normalized reviews from database")
+                stage1_result = await self._fetch_normalized_reviews_from_db(job_id)
+                if stage1_result:
+                    logger.info(f"Loaded {len(stage1_result.get('reviews_normalized', []))} reviews from DB for reclassification")
+                    # Clean up old spans and issues before reclassification
+                    if self._span_repo:
+                        deactivated = await self._span_repo.deactivate_spans_for_job(job_id)
+                        logger.info(f"Deactivated {deactivated} existing spans for job {job_id}")
+                    if self._issue_repo:
+                        deleted = await self._issue_repo.delete_issues_for_job(job_id)
+                        logger.info(f"Deleted {deleted} existing issues for job {job_id}")
+
            if "classify" in stages and stage1_result:
                start = time.time()
                logger.info("Running Stage 2: Classification")
@@ -308,7 +385,7 @@ class ReviewIQPipeline(BasePipeline):
                logger.info("Running Stage 3: Issue Routing")

                try:
-                    stage3_result = await self._run_route(stage2_result)
+                    stage3_result = await self._run_route(stage2_result, job_id=job_id)
                    duration_ms = int((time.time() - start) * 1000)
                    stages_run.append("route")
                    stage_results["route"] = StageResult(
@@ -371,6 +448,43 @@ class ReviewIQPipeline(BasePipeline):
                        error=f"aggregate failed: {e}",
                    )

+            # Stage 5: Synthesize (AI-generated narratives)
+            # Requires job_id and execution_id from pipeline execution tracking
+            if "synthesize" in stages and job_id:
+                start = time.time()
+                logger.info("Running Stage 5: Synthesis")
+
+                try:
+                    # Get the execution_id for this pipeline run
+                    execution_id = input_data.get("execution_id")
+                    if execution_id:
+                        stage5_result = await self._run_synthesize(job_id, execution_id)
+                        duration_ms = int((time.time() - start) * 1000)
+                        stages_run.append("synthesize")
+                        stage_results["synthesize"] = StageResult(
+                            stage="synthesize",
+                            success=True,
+                            data={
+                                "actions_generated": len(stage5_result.action_plan) if stage5_result else 0,
+                                "has_narrative": bool(stage5_result and stage5_result.executive_narrative),
+                            },
+                            error=None,
+                            duration_ms=duration_ms,
+                        )
+                    else:
+                        logger.warning("No execution_id provided, skipping synthesis")
+                except Exception as e:
+                    logger.exception("Stage 5 failed")
+                    stage_results["synthesize"] = StageResult(
+                        stage="synthesize",
+                        success=False,
+                        data={},
+                        error=str(e),
+                        duration_ms=int((time.time() - start) * 1000),
+                    )
+                    # Synthesis failure is non-fatal - pipeline still succeeds
+                    logger.warning(f"Synthesis failed but continuing: {e}")
+
            return BasePipelineResult(
                pipeline_id="reviewiq",
                stages_run=stages_run,
@@ -558,6 +672,34 @@ class ReviewIQPipeline(BasePipeline):
                    ],
                    collapsed=False,
                ),
+                DashboardSection(
+                    id="classified_reviews",
+                    title="Classified Reviews",
+                    description="All reviews with URT classification codes and human-readable meanings",
+                    widgets=[
+                        WidgetConfig(
+                            id="classified_reviews_table",
+                            type="table",
+                            title="Reviews with URT Codes",
+                            grid={"x": 0, "y": 0, "w": 12, "h": 3},
+                            config={
+                                "columns": [
+                                    {"key": "span_text", "header": "Review Excerpt", "width": 300},
+                                    {"key": "urt_code", "header": "Code", "width": 80},
+                                    {"key": "code_name", "header": "Category", "width": 150},
+                                    {"key": "domain_name", "header": "Domain", "width": 100},
+                                    {"key": "valence", "header": "Sentiment", "width": 80},
+                                    {"key": "intensity", "header": "Intensity", "width": 80},
+                                    {"key": "rating", "header": "Stars", "width": 60, "align": "center"},
+                                ],
+                                "row_key": "span_id",
+                                "page_size": 15,
+                                "sortable": True,
+                            },
+                        ),
+                    ],
+                    collapsed=False,
+                ),
            ],
            default_time_range="30d",
            refresh_interval=300,
@@ -573,7 +715,7 @@ class ReviewIQPipeline(BasePipeline):

        Args:
            widget_id: Widget identifier
-            params: Query parameters (business_id, time_range, etc.)
+            params: Query parameters (business_id, job_id, time_range, etc.)

        Returns:
            Widget data dictionary
@@ -581,36 +723,41 @@ class ReviewIQPipeline(BasePipeline):
        await self.initialize()

        business_id = params.get("business_id")
+        job_id = params.get("job_id")
        time_range = params.get("time_range", "30d")

        match widget_id:
            # Overview stats
            case "total_reviews":
-                return await self._get_review_count(business_id)
+                return await self._get_review_count(business_id, job_id)
            case "reviews_processed":
-                return await self._get_processed_count(business_id, time_range)
+                return await self._get_processed_count(business_id, job_id, time_range)
            case "issues_found":
-                return await self._get_issues_count(business_id)
+                return await self._get_issues_count(business_id, job_id)
            case "avg_rating":
-                return await self._get_avg_rating(business_id, time_range)
+                return await self._get_avg_rating(business_id, job_id, time_range)

            # Sentiment
            case "sentiment_distribution":
-                return await self._get_sentiment_distribution(business_id)
+                return await self._get_sentiment_distribution(business_id, job_id)
            case "sentiment_trend":
-                return await self._get_sentiment_trend(business_id, time_range)
+                return await self._get_sentiment_trend(business_id, job_id, time_range)

            # Classification
            case "urt_distribution":
-                return await self._get_urt_distribution(business_id)
+                return await self._get_urt_distribution(business_id, job_id)
            case "intensity_heatmap":
-                return await self._get_intensity_heatmap(business_id)
+                return await self._get_intensity_heatmap(business_id, job_id)

            # Issues
            case "issues_table":
-                return await self._get_issues_table(business_id, params)
+                return await self._get_issues_table(business_id, job_id, params)
            case "issues_by_domain":
-                return await self._get_issues_by_domain(business_id)
+                return await self._get_issues_by_domain(business_id, job_id)
+
+            # Classified Reviews
+            case "classified_reviews_table":
+                return await self._get_classified_reviews(business_id, job_id, params)

            case _:
                logger.warning(f"Unknown widget: {widget_id}")
@@ -643,6 +790,9 @@ class ReviewIQPipeline(BasePipeline):
        result = PipelineResult()
        validation_results: dict[str, ValidationResult] = {}

+        # Extract job_id for linking issues
+        job_id = scraper_output.get("job_id")
+
        # Stage 1: Normalize
        if 1 in stages:
            logger.info("Running Stage 1: Normalization")
@@ -668,7 +818,7 @@ class ReviewIQPipeline(BasePipeline):
        # Stage 3: Route
        if 3 in stages and result.stage2:
            logger.info("Running Stage 3: Issue Routing")
-            result.stage3 = await self._run_route(result.stage2)
+            result.stage3 = await self._run_route(result.stage2, job_id=job_id)

            if validate:
                validation_results["stage3"] = await validate_stage3_output(
@@ -700,10 +850,10 @@ class ReviewIQPipeline(BasePipeline):
        await self.initialize()
        return await self._run_classify(stage1_output)

-    async def route(self, stage2_output: Stage2Output) -> Stage3Output:
+    async def route(self, stage2_output: Stage2Output, job_id: str | None = None) -> Stage3Output:
        """Run Stage 3: Issue Routing (legacy method)."""
        await self.initialize()
-        return await self._run_route(stage2_output)
+        return await self._run_route(stage2_output, job_id=job_id)

    async def aggregate(
        self,
@@ -719,14 +869,91 @@ class ReviewIQPipeline(BasePipeline):
    # Internal Stage Implementations
    # =========================================================================

-    def _ensure_scraper_output(self, input_data: dict[str, Any]) -> ScraperOutput:
-        """Ensure input data is in ScraperOutput format."""
+    async def _ensure_scraper_output(self, input_data: dict[str, Any]) -> ScraperOutput:
+        """Ensure input data is in ScraperOutput format.
+
+        If only job_id is provided, fetches job data from the database.
+        """
        # If it has all required fields, use as-is
        required = ["job_id", "business_id", "place_id", "reviews"]
        if all(k in input_data for k in required):
            return input_data  # type: ignore

-        # Otherwise, wrap it
+        # If we have a job_id but missing reviews, fetch from database
+        job_id = input_data.get("job_id")
+        if job_id and not input_data.get("reviews") and self._db:
+            logger.info(f"Fetching job data from database for job_id: {job_id}")
+            async with self._db.pool.acquire() as conn:
+                row = await conn.fetchrow(
+                    """
+                    SELECT job_id, status, reviews_data, reviews_count,
+                           metadata->>'business_name' as business_name,
+                           metadata->>'place_id' as place_id,
+                           metadata->>'address' as address,
+                           metadata->>'category' as category,
+                           metadata->>'total_reviews' as total_reviews,
+                           metadata->>'average_rating' as average_rating,
+                           scraper_version
+                    FROM public.jobs
+                    WHERE job_id = $1::uuid
+                    """,
+                    str(job_id),
+                )
+
+            if row and row["reviews_data"]:
+                reviews_data = row["reviews_data"]
+                # asyncpg may return JSONB as a string - parse it if needed
+                if isinstance(reviews_data, str):
+                    logger.info("Parsing reviews_data JSON string")
+                    reviews_data = json.loads(reviews_data)
+                # Convert reviews_data to RawReview format
+                # Handle both API format (review_id, author, rating) and scraper format (reviewId, name, stars)
+                reviews = []
+                for i, review in enumerate(reviews_data):
+                    if isinstance(review, str):
+                        # Skip if review is somehow a string
+                        logger.warning(f"Skipping review {i}: got string instead of dict")
+                        continue
+                    # Parse the review time (may be relative like "10 months ago")
+                    raw_time = review.get("timestamp") or review.get("publishedAtDate") or ""
+                    parsed_time = _parse_relative_date(raw_time)
+
+                    reviews.append({
+                        "review_id": review.get("review_id") or review.get("reviewId") or f"review_{i}",
+                        "author_name": review.get("author") or review.get("name") or "Anonymous",
+                        "author_id": review.get("reviewerId"),
+                        "rating": review.get("rating") or review.get("stars") or 0,
+                        "text": review.get("text"),
+                        "review_time": parsed_time,
+                        "response_text": review.get("responseFromOwner", {}).get("text") if review.get("responseFromOwner") else None,
+                        "response_time": review.get("responseFromOwner", {}).get("publishedAtDate") if review.get("responseFromOwner") else None,
+                        "photos": review.get("reviewImageUrls"),
+                        "raw_payload": review,
+                    })
+
+                logger.info(f"Loaded {len(reviews)} reviews from job {job_id}")
+
+                return ScraperOutput(
+                    job_id=str(row["job_id"]),
+                    status=row["status"] or "completed",
+                    business_id=row["business_name"] or "unknown",
+                    place_id=row["place_id"] or "unknown",
+                    business_info={
+                        "name": row["business_name"] or "",
+                        "address": row["address"] or "",
+                        "category": row["category"] or "",
+                        "total_reviews": int(row["total_reviews"]) if row["total_reviews"] else 0,
+                        "average_rating": float(row["average_rating"]) if row["average_rating"] else 0.0,
+                    },
+                    reviews=reviews,
+                    scrape_time_ms=0,
+                    reviews_scraped=len(reviews),
+                    scraper_version=row["scraper_version"] or "unknown",
+                )
+            else:
+                logger.warning(f"No reviews found in database for job_id: {job_id}")
+
+        # Otherwise, wrap it with empty/default values
        return ScraperOutput(
            job_id=input_data.get("job_id", "unknown"),
            status=input_data.get("status", "completed"),
@@ -739,6 +966,70 @@ class ReviewIQPipeline(BasePipeline):
            scraper_version=input_data.get("scraper_version", "unknown"),
        )

+    async def _fetch_normalized_reviews_from_db(self, job_id: str) -> Stage1Output | None:
+        """Fetch existing normalized reviews from DB for reclassification.
+
+        Used when running classify stage standalone without normalize.
+        """
+        if not self._db:
+            return None
+
+        async with self._db.pool.acquire() as conn:
+            rows = await conn.fetch(
+                """
+                SELECT
+                    source,
+                    review_id,
+                    review_version,
+                    business_id,
+                    place_id,
+                    text,
+                    text_normalized,
+                    rating,
+                    review_time
+                FROM pipeline.reviews_enriched
+                WHERE job_id = $1::uuid
+                AND is_latest = TRUE
+                ORDER BY review_time DESC
+                """,
+                job_id,
+            )
+
+        if not rows:
+            logger.warning(f"No normalized reviews found in DB for job_id: {job_id}")
+            return None
+
+        reviews_normalized = [
+            NormalizedReview(
+                source=row["source"],
+                review_id=row["review_id"],
+                review_version=row["review_version"],
+                business_id=row["business_id"],
+                place_id=row["place_id"],
+                text=row["text"],
+                text_normalized=row["text_normalized"],
+                rating=row["rating"],
+                review_time=row["review_time"],
+            )
+            for row in rows
+        ]
+
+        logger.info(f"Fetched {len(reviews_normalized)} normalized reviews from DB for job {job_id}")
+
+        return Stage1Output(
+            job_id=job_id,
+            reviews_normalized=reviews_normalized,
+            reviews_skipped=[],
+            duplicates_found=[],
+            stats={
+                "total_input": len(reviews_normalized),
+                "processed": len(reviews_normalized),
+                "skipped": 0,
+                "duplicates": 0,
+                "from_db": True,
+            },
+        )
+
    async def _run_normalize(self, scraper_output: ScraperOutput) -> Stage1Output:
        """Run normalization stage."""
        stage1 = Stage1Normalizer(
@@ -788,6 +1079,7 @@ class ReviewIQPipeline(BasePipeline):
                taxonomy_version=self._config.taxonomy_version,
                profile=self._config.classification_profile,
                max_spans_per_review=self._config.max_spans_per_review,
+                job_id=stage1_output.get("job_id"),
            ),
        )

@@ -796,7 +1088,7 @@ class ReviewIQPipeline(BasePipeline):
        finally:
            await stage2.close()

-    async def _run_route(self, stage2_output: Stage2Output) -> Stage3Output:
+    async def _run_route(self, stage2_output: Stage2Output, job_id: str | None = None) -> Stage3Output:
        """Run issue routing stage."""
        stage3 = Stage3Router(
            self._config,
@@ -806,9 +1098,12 @@ class ReviewIQPipeline(BasePipeline):
        )

        spans_to_route = []
+        now = datetime.now()
        for review in stage2_output["reviews_classified"]:
            for span in review.get("spans", []):
                if span["valence"] in ("V-", "V±"):
+                    # Use current datetime as fallback for missing review_time
+                    review_time = review.get("review_time") or now
                    spans_to_route.append(
                        SpanToRoute(
                            span_id=span["span_id"],
@@ -818,13 +1113,13 @@ class ReviewIQPipeline(BasePipeline):
                            valence=span["valence"],
                            intensity=span["intensity"],
                            entity_normalized=span.get("entity_normalized"),
-                            review_time=review.get("review_time", ""),
+                            review_time=review_time,
                            confidence=span.get("confidence", "medium"),
                            trust_score=review.get("trust_score", 0.5),
                        )
                    )

-        return await stage3.process(Stage3Input(spans=spans_to_route))
+        return await stage3.process(Stage3Input(spans=spans_to_route, job_id=job_id))

    async def _run_aggregate(
        self,
@@ -848,17 +1143,39 @@ class ReviewIQPipeline(BasePipeline):

        return await stage4.process(input_data)

+    async def _run_synthesize(self, job_id: str, execution_id: str):
+        """Run AI synthesis stage to generate narratives and action plans."""
+        from reviewiq_pipeline.stages.stage5_synthesize import Synthesis
+
+        # Create LLM client for synthesis
+        llm_client = LLMClient.create(self._config)
+
+        try:
+            stage5 = Stage5Synthesizer(
+                pool=self._db.pool,
+                llm_client=llm_client,
+            )
+
+            return await stage5.run(job_id, execution_id)
+        finally:
+            await llm_client.close()
+
    # =========================================================================
    # Widget Data Methods
    # =========================================================================

-    async def _get_review_count(self, business_id: str | None) -> dict[str, Any]:
+    async def _get_review_count(self, business_id: str | None, job_id: str | None = None) -> dict[str, Any]:
        """Get total review count."""
        if not self._db:
            return {"total_reviews": 0}

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                count = await conn.fetchval(
+                    "SELECT COUNT(*) FROM pipeline.reviews_enriched WHERE job_id = $1::uuid",
+                    job_id,
+                )
+            elif business_id:
                count = await conn.fetchval(
                    "SELECT COUNT(*) FROM pipeline.reviews_raw WHERE business_id = $1",
                    business_id,
@@ -871,7 +1188,7 @@ class ReviewIQPipeline(BasePipeline):
        return {"total_reviews": count or 0}

    async def _get_processed_count(
-        self, business_id: str | None, time_range: str
+        self, business_id: str | None, job_id: str | None, time_range: str
    ) -> dict[str, Any]:
        """Get processed review count with trend."""
        if not self._db:
@@ -881,7 +1198,14 @@ class ReviewIQPipeline(BasePipeline):
        days = self._parse_time_range(time_range)

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                # When filtering by job_id, just return count for that job
+                current = await conn.fetchval(
+                    "SELECT COUNT(*) FROM pipeline.reviews_enriched WHERE job_id = $1::uuid",
+                    job_id,
+                )
+                return {"reviews_processed": current or 0, "processed_change": 0}
+            elif business_id:
                current = await conn.fetchval(
                    """
                    SELECT COUNT(*) FROM pipeline.reviews_enriched
@@ -929,13 +1253,21 @@ class ReviewIQPipeline(BasePipeline):
            "processed_change": round(change, 1),
        }

-    async def _get_issues_count(self, business_id: str | None) -> dict[str, Any]:
+    async def _get_issues_count(self, business_id: str | None, job_id: str | None = None) -> dict[str, Any]:
        """Get open issues count."""
        if not self._db:
            return {"issues_count": 0}

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                count = await conn.fetchval(
+                    """
+                    SELECT COUNT(*) FROM pipeline.issues
+                    WHERE job_id = $1::uuid AND state = 'open'
+                    """,
+                    job_id,
+                )
+            elif business_id:
                count = await conn.fetchval(
                    """
                    SELECT COUNT(*) FROM pipeline.issues
@@ -951,7 +1283,7 @@ class ReviewIQPipeline(BasePipeline):
        return {"issues_count": count or 0}

    async def _get_avg_rating(
-        self, business_id: str | None, time_range: str
+        self, business_id: str | None, job_id: str | None, time_range: str
    ) -> dict[str, Any]:
        """Get average rating with trend."""
        if not self._db:
@@ -960,7 +1292,13 @@ class ReviewIQPipeline(BasePipeline):
        days = self._parse_time_range(time_range)

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                current = await conn.fetchval(
+                    "SELECT AVG(rating) FROM pipeline.reviews_enriched WHERE job_id = $1::uuid",
+                    job_id,
+                )
+                return {"avg_rating": round(float(current), 2) if current else 0, "rating_change": 0}
+            elif business_id:
                current = await conn.fetchval(
                    """
                    SELECT AVG(rating) FROM pipeline.reviews_enriched
@@ -1009,14 +1347,26 @@ class ReviewIQPipeline(BasePipeline):
        }

    async def _get_sentiment_distribution(
-        self, business_id: str | None
+        self, business_id: str | None, job_id: str | None = None
    ) -> dict[str, Any]:
        """Get sentiment distribution for pie chart."""
        if not self._db:
            return {"data": []}

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                rows = await conn.fetch(
+                    """
+                    SELECT
+                        valence,
+                        COUNT(*) as count
+                    FROM pipeline.review_spans
+                    WHERE job_id = $1::uuid AND is_active = TRUE
+                    GROUP BY valence
+                    """,
+                    job_id,
+                )
+            elif business_id:
                rows = await conn.fetch(
                    """
                    SELECT
@@ -1059,7 +1409,7 @@ class ReviewIQPipeline(BasePipeline):
        return {"data": data}

    async def _get_sentiment_trend(
-        self, business_id: str | None, time_range: str
+        self, business_id: str | None, job_id: str | None, time_range: str
    ) -> dict[str, Any]:
        """Get sentiment trend over time for line chart."""
        if not self._db:
@@ -1068,7 +1418,23 @@ class ReviewIQPipeline(BasePipeline):
        days = self._parse_time_range(time_range)

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                rows = await conn.fetch(
+                    """
+                    SELECT
+                        DATE(review_time) as date,
+                        COUNT(*) FILTER (WHERE valence = 'V+') as positive,
+                        COUNT(*) FILTER (WHERE valence = 'V-') as negative,
+                        COUNT(*) FILTER (WHERE valence = 'V0') as neutral
+                    FROM pipeline.review_spans
+                    WHERE job_id = $1::uuid
+                    AND is_active = TRUE
+                    GROUP BY DATE(review_time)
+                    ORDER BY date
+                    """,
+                    job_id,
+                )
+            elif business_id:
                rows = await conn.fetch(
                    """
                    SELECT
@@ -1115,13 +1481,26 @@ class ReviewIQPipeline(BasePipeline):

        return {"data": data}

-    async def _get_urt_distribution(self, business_id: str | None) -> dict[str, Any]:
+    async def _get_urt_distribution(self, business_id: str | None, job_id: str | None = None) -> dict[str, Any]:
        """Get URT domain distribution for bar chart."""
        if not self._db:
            return {"data": []}

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                rows = await conn.fetch(
+                    """
+                    SELECT
+                        SUBSTRING(urt_primary, 1, 1) as domain,
+                        COUNT(*) as count
+                    FROM pipeline.review_spans
+                    WHERE job_id = $1::uuid AND is_active = TRUE
+                    GROUP BY SUBSTRING(urt_primary, 1, 1)
+                    ORDER BY count DESC
+                    """,
+                    job_id,
+                )
+            elif business_id:
                rows = await conn.fetch(
                    """
                    SELECT
@@ -1168,13 +1547,26 @@ class ReviewIQPipeline(BasePipeline):

        return {"data": data}

-    async def _get_intensity_heatmap(self, business_id: str | None) -> dict[str, Any]:
+    async def _get_intensity_heatmap(self, business_id: str | None, job_id: str | None = None) -> dict[str, Any]:
        """Get domain x intensity heatmap data."""
        if not self._db:
            return {"data": []}

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                rows = await conn.fetch(
+                    """
+                    SELECT
+                        SUBSTRING(urt_primary, 1, 1) as domain,
+                        intensity,
+                        COUNT(*) as count
+                    FROM pipeline.review_spans
+                    WHERE job_id = $1::uuid AND is_active = TRUE
+                    GROUP BY SUBSTRING(urt_primary, 1, 1), intensity
+                    """,
+                    job_id,
+                )
+            elif business_id:
                rows = await conn.fetch(
                    """
                    SELECT
@@ -1222,7 +1614,7 @@ class ReviewIQPipeline(BasePipeline):
        return {"data": data}

    async def _get_issues_table(
-        self, business_id: str | None, params: dict[str, Any]
+        self, business_id: str | None, job_id: str | None, params: dict[str, Any]
    ) -> dict[str, Any]:
        """Get issues table data."""
        if not self._db:
@@ -1233,7 +1625,30 @@ class ReviewIQPipeline(BasePipeline):
        offset = (page - 1) * page_size

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                rows = await conn.fetch(
+                    """
+                    SELECT
+                        issue_id,
+                        domain,
+                        primary_subcode as subcode,
+                        span_count,
+                        max_intensity,
+                        state
+                    FROM pipeline.issues
+                    WHERE job_id = $1::uuid
+                    ORDER BY span_count DESC, created_at DESC
+                    LIMIT $2 OFFSET $3
+                    """,
+                    job_id,
+                    page_size,
+                    offset,
+                )
+                total = await conn.fetchval(
+                    "SELECT COUNT(*) FROM pipeline.issues WHERE job_id = $1::uuid",
+                    job_id,
+                )
+            elif business_id:
                rows = await conn.fetch(
                    """
                    SELECT
@@ -1279,13 +1694,24 @@ class ReviewIQPipeline(BasePipeline):

        return {"data": data, "total": total or 0}

-    async def _get_issues_by_domain(self, business_id: str | None) -> dict[str, Any]:
+    async def _get_issues_by_domain(self, business_id: str | None, job_id: str | None = None) -> dict[str, Any]:
        """Get issues grouped by domain for pie chart."""
        if not self._db:
            return {"data": []}

        async with self._db._pool.acquire() as conn:
-            if business_id:
+            if job_id:
+                rows = await conn.fetch(
+                    """
+                    SELECT domain, COUNT(*) as count
+                    FROM pipeline.issues
+                    WHERE job_id = $1::uuid
+                    GROUP BY domain
+                    ORDER BY count DESC
+                    """,
+                    job_id,
+                )
+            elif business_id:
                rows = await conn.fetch(
                    """
                    SELECT domain, COUNT(*) as count
@@ -1310,6 +1736,89 @@ class ReviewIQPipeline(BasePipeline):

        return {"data": data}

+    async def _get_classified_reviews(
+        self, business_id: str | None, job_id: str | None, params: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Get classified reviews with URT codes and human-readable names."""
+        if not self._db:
+            return {"data": [], "total": 0}
+
+        page = params.get("page", 1)
+        page_size = params.get("page_size", 15)
+        offset = (page - 1) * page_size
+
+        async with self._db._pool.acquire() as conn:
+            # Build the query with JOINs to get human-readable code names
+            base_query = """
+                SELECT
+                    s.span_id,
+                    s.span_text,
+                    s.urt_primary as urt_code,
+                    COALESCE(sub.name, cat.name, dom.name) as code_name,
+                    COALESCE(sub.definition, dom.description) as code_definition,
+                    dom.name as domain_name,
+                    CASE s.valence
+                        WHEN 'V+' THEN 'Positive'
+                        WHEN 'V-' THEN 'Negative'
+                        WHEN 'V0' THEN 'Neutral'
+                        WHEN 'V±' THEN 'Mixed'
+                        ELSE s.valence
+                    END as valence,
+                    CASE s.intensity
+                        WHEN 'I1' THEN 'Mild'
+                        WHEN 'I2' THEN 'Moderate'
+                        WHEN 'I3' THEN 'Strong'
+                        ELSE s.intensity
+                    END as intensity,
+                    e.rating,
+                    s.review_time
+                FROM pipeline.review_spans s
+                LEFT JOIN pipeline.reviews_enriched e ON s.review_id = e.review_id AND s.review_version = e.review_version
+                LEFT JOIN pipeline.urt_domains dom ON SUBSTRING(s.urt_primary, 1, 1) = dom.code
+                LEFT JOIN pipeline.urt_categories cat ON SUBSTRING(s.urt_primary, 1, 2) = cat.code
+                LEFT JOIN pipeline.urt_subcodes sub ON s.urt_primary = sub.code
+                WHERE s.is_active = TRUE
+            """
+            count_query = """
+                SELECT COUNT(*) FROM pipeline.review_spans s
+                WHERE s.is_active = TRUE
+            """
+
+            if job_id:
+                base_query += " AND s.job_id = $1::uuid"
+                count_query += " AND s.job_id = $1::uuid"
+                base_query += f" ORDER BY s.review_time DESC LIMIT {page_size} OFFSET {offset}"
+                rows = await conn.fetch(base_query, job_id)
+                total = await conn.fetchval(count_query, job_id)
+            elif business_id:
+                base_query += " AND s.business_id = $1"
+                count_query += " AND s.business_id = $1"
+                base_query += f" ORDER BY s.review_time DESC LIMIT {page_size} OFFSET {offset}"
+                rows = await conn.fetch(base_query, business_id)
+                total = await conn.fetchval(count_query, business_id)
+            else:
+                base_query += f" ORDER BY s.review_time DESC LIMIT {page_size} OFFSET {offset}"
+                rows = await conn.fetch(base_query)
+                total = await conn.fetchval(count_query)
+
+        data = [
+            {
+                "span_id": row["span_id"],
+                "span_text": row["span_text"],
+                "urt_code": row["urt_code"],
+                "code_name": row["code_name"] or "Unknown",
+                "code_definition": row["code_definition"] or "",
+                "domain_name": row["domain_name"] or "Unknown",
+                "valence": row["valence"],
+                "intensity": row["intensity"],
+                "rating": row["rating"],
+                "review_time": row["review_time"].isoformat() if row["review_time"] else None,
+            }
+            for row in rows
+        ]
+
+        return {"data": data, "total": total or 0}
+
    def _parse_time_range(self, time_range: str) -> int:
        """Parse time range string to days."""
        if time_range.endswith("d"):