#!/usr/bin/env python3
"""
Optimized ReviewIQ Analytics endpoint.

Provides a single API endpoint returning all dashboard data with optimized SQL queries.
Replaces multiple widget queries with 4-5 efficient queries.
"""

import logging
from datetime import datetime, timedelta
from typing import Any

import asyncpg
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field

log = logging.getLogger(__name__)

# Create router
router = APIRouter(prefix="/api/pipelines/reviewiq", tags=["reviewiq-analytics"])

# Database pool (set by main server)
_pool: asyncpg.Pool | None = None


def set_database(pool: asyncpg.Pool) -> None:
    """Set the database pool for analytics operations."""
    global _pool
    _pool = pool


# ==================== Pydantic Models ====================


class OverviewStats(BaseModel):
    """Overview statistics for the dashboard."""

    total_reviews: int = Field(0, description="Total reviews processed")
    total_spans: int = Field(0, description="Total classified spans")
    open_issues: int = Field(0, description="Open issues count")
    avg_rating: float | None = Field(None, description="Average review rating")
    positive_count: int = Field(0, description="Positive sentiment count")
    negative_count: int = Field(0, description="Negative sentiment count")
    neutral_count: int = Field(0, description="Neutral sentiment count")
    mixed_count: int = Field(0, description="Mixed sentiment count")


class SentimentDataPoint(BaseModel):
    """Single data point for sentiment distribution."""

    valence: str = Field(..., description="Valence label (V+, V-, V0, V±)")
    count: int = Field(..., description="Count of spans (mentions)")
    review_count: int = Field(0, description="Count of distinct reviews")
    percentage: float = Field(..., description="Percentage of total reviews")


class SentimentTrendPoint(BaseModel):
    """Single data point for sentiment trend over time."""

    period: str = Field(..., description="Time period (e.g., '2024-W01')")
    positive: int = Field(0, description="Positive count")
    negative: int = Field(0, description="Negative count")
    neutral: int = Field(0, description="Neutral count")
    mixed: int = Field(0, description="Mixed count")


class SentimentData(BaseModel):
    """Sentiment distribution and trend data."""

    distribution: list[SentimentDataPoint] = Field(default_factory=list)
    trend: list[SentimentTrendPoint] = Field(default_factory=list)


class URTDomainPoint(BaseModel):
    """URT domain distribution point with sentiment breakdown."""

    domain: str = Field(..., description="Domain code (P, J, O, A)")
    domain_name: str = Field(..., description="Domain display name")
    count: int = Field(..., description="Count of spans (mentions)")
    review_count: int = Field(0, description="Count of distinct reviews affected")
    percentage: float = Field(..., description="Percentage of total reviews")
    positive_count: int = Field(0, description="Positive sentiment spans")
    negative_count: int = Field(0, description="Negative sentiment spans")
    neutral_count: int = Field(0, description="Neutral sentiment spans")
    positive_reviews: int = Field(0, description="Reviews with positive sentiment")
    negative_reviews: int = Field(0, description="Reviews with negative sentiment")


class IntensityPoint(BaseModel):
    """Intensity distribution by domain."""

    domain: str = Field(..., description="Domain code")
    intensity: str = Field(..., description="Intensity level (I1, I2, I3)")
    count: int = Field(..., description="Count of spans")


class URTData(BaseModel):
    """URT domain distribution and heatmap data."""

    domains: list[URTDomainPoint] = Field(default_factory=list)
    intensity_heatmap: list[IntensityPoint] = Field(default_factory=list)


class IssueItem(BaseModel):
    """Single issue item with enriched URT metadata."""

    issue_id: str = Field(..., description="Issue identifier")
    primary_subcode: str = Field(..., description="URT subcode")
    subcode_name: str | None = Field(None, description="Subcode display name")
    subcode_definition: str | None = Field(None, description="Subcode definition")
    solution: str | None = Field(None, description="Recommended action")
    solution_complexity: str | None = Field(None, description="Solution complexity (simple/medium/complex)")
    domain: str = Field(..., description="Domain code")
    domain_name: str | None = Field(None, description="Domain display name")
    category_name: str | None = Field(None, description="Category display name")
    default_owner: str | None = Field(None, description="Default owner team")
    negative_example: str | None = Field(None, description="Example negative review text")
    entity: str | None = Field(None, description="Related entity")
    state: str = Field(..., description="Issue state")
    priority_score: float = Field(..., description="Priority score")
    span_count: int = Field(..., description="Number of related spans")
    max_intensity: str | None = Field(None, description="Maximum intensity")
    created_at: str | None = Field(None, description="Creation timestamp")


class PaginatedIssues(BaseModel):
    """Paginated issues list."""

    items: list[IssueItem] = Field(default_factory=list)
    total: int = Field(0, description="Total count")
    page: int = Field(1, description="Current page")
    page_size: int = Field(10, description="Items per page")


class SpanItem(BaseModel):
    """Single classified span."""

    span_id: str = Field(..., description="Span identifier")
    span_text: str = Field(..., description="Span text content")
    urt_primary: str | None = Field(None, description="Primary URT code")
    valence: str | None = Field(None, description="Valence")
    intensity: str | None = Field(None, description="Intensity")
    review_time: str | None = Field(None, description="Review timestamp")
    source_review_id: str | None = Field(None, description="Source review ID")
    entity: str | None = Field(None, description="Entity mentioned")


class PaginatedSpans(BaseModel):
    """Paginated spans list."""

    items: list[SpanItem] = Field(default_factory=list)
    total: int = Field(0, description="Total count")
    page: int = Field(1, description="Current page")
    page_size: int = Field(10, description="Items per page")


class TimelinePoint(BaseModel):
    """Single point on the timeline chart."""

    date: str = Field(..., description="Date string (YYYY-MM-DD or YYYY-WXX)")
    review_count: int = Field(0, description="Number of reviews")
    span_count: int = Field(0, description="Number of spans")
    avg_rating: float | None = Field(None, description="Average rating")
    positive_count: int = Field(0, description="Positive sentiment count")
    negative_count: int = Field(0, description="Negative sentiment count")


# ==================== Domain Scores & Insights ====================


class DomainScore(BaseModel):
    """Domain-level KPI score."""

    domain: str = Field(..., description="Domain code")
    name: str = Field(..., description="Domain display name")
    score: float = Field(..., description="Score 0-100")
    status: str = Field(..., description="Status: good/warning/critical")
    trend: str | None = Field(None, description="Trend vs previous period (e.g., '+3.2')")
    positive_count: int = Field(0, description="Positive spans")
    negative_count: int = Field(0, description="Negative spans")
    total_count: int = Field(0, description="Total spans")


class StrengthItem(BaseModel):
    """A strength (highly positive subcode)."""

    rank: int = Field(..., description="Rank order")
    subcode: str = Field(..., description="URT subcode")
    subcode_name: str = Field(..., description="Subcode display name")
    domain: str = Field(..., description="Domain code")
    domain_name: str = Field(..., description="Domain display name")
    positive_percentage: float = Field(..., description="% positive sentiment")
    span_count: int = Field(..., description="Total mentions")
    marketing_angle: str | None = Field(None, description="Marketing suggestion")


class WeaknessItem(BaseModel):
    """A weakness (negative issue to fix)."""

    rank: int = Field(..., description="Rank order")
    issue_id: str | None = Field(None, description="Related issue ID if exists")
    subcode: str = Field(..., description="URT subcode")
    subcode_name: str = Field(..., description="Subcode display name")
    domain: str = Field(..., description="Domain code")
    domain_name: str = Field(..., description="Domain display name")
    negative_percentage: float = Field(..., description="% negative sentiment")
    span_count: int = Field(..., description="Total negative mentions")
    intensity: str | None = Field(None, description="Max intensity")
    solution: str | None = Field(None, description="Recommended action")
    solution_complexity: str | None = Field(None, description="Complexity")
    projected_rating_impact: float | None = Field(None, description="Potential rating gain if fixed")
    owner: str | None = Field(None, description="Default owner team")


class RatingSimulator(BaseModel):
    """Rating impact simulation."""

    current_rating: float = Field(..., description="Current average rating")
    if_fix_top_1: float | None = Field(None, description="Projected rating if top 1 issue fixed")
    if_fix_top_3: float | None = Field(None, description="Projected rating if top 3 issues fixed")
    potential_gain: float = Field(0, description="Maximum potential rating gain")


class OpportunitySpan(BaseModel):
    """A span (customer feedback) related to an opportunity item."""

    span_id: str = Field(..., description="Span identifier")
    span_text: str = Field(..., description="The classified span text")
    review_text: str | None = Field(None, description="Full review text for context")
    rating: int | None = Field(None, description="Source review rating")
    review_id: str | None = Field(None, description="Source review ID for navigation")
    review_date: str | None = Field(None, description="Review date")


class OpportunityItem(BaseModel):
    """An item in the opportunity matrix with coordinates and detail data."""

    subcode: str = Field(..., description="URT subcode")
    name: str = Field(..., description="Human-readable subcode name")
    x: float = Field(..., description="X position (0-1, frequency within quadrant)")
    y: float = Field(..., description="Y position (0-1, effort within quadrant)")
    # Detail data for hover/click
    domain: str = Field(..., description="Domain code (P, J, O, etc.)")
    domain_name: str = Field(..., description="Domain display name")
    negative_pct: float = Field(..., description="Percentage of negative mentions")
    span_count: int = Field(..., description="Number of mentions")
    solution: str | None = Field(None, description="Suggested solution from taxonomy")
    complexity: str = Field(..., description="Solution complexity (simple/medium/complex)")
    rating_impact: float | None = Field(None, description="Projected rating improvement")
    owner: str | None = Field(None, description="Suggested owner/team")
    example: str | None = Field(None, description="Example negative quote")
    spans: list[OpportunitySpan] = Field(default_factory=list, description="Sample customer feedback spans")


class OpportunityMatrix(BaseModel):
    """2x2 opportunity matrix."""

    quick_wins: list[OpportunityItem] = Field(default_factory=list, description="High freq + simple")
    critical: list[OpportunityItem] = Field(default_factory=list, description="High freq + complex")
    nice_to_have: list[OpportunityItem] = Field(default_factory=list, description="Low freq + simple")
    strategic: list[OpportunityItem] = Field(default_factory=list, description="Low freq + complex")


class Insights(BaseModel):
    """Business insights including strengths and weaknesses."""

    strengths: list[StrengthItem] = Field(default_factory=list)
    weaknesses: list[WeaknessItem] = Field(default_factory=list)
    rating_simulator: RatingSimulator | None = Field(None)
    opportunity_matrix: OpportunityMatrix | None = Field(None)
    executive_summary: str = Field("", description="Auto-generated summary")


# ==================== AI Synthesis Models ====================


class ActionItemResponse(BaseModel):
    """A specific action recommendation from AI synthesis."""

    id: str = Field(..., description="Action identifier")
    title: str = Field(..., description="Clear action title")
    why: str = Field("", description="Root cause from reviews")
    what: str = Field("", description="Specific steps to take")
    who: str = Field("", description="Department or role responsible")
    impact: str = Field("", description="Expected outcome")
    evidence: list[str] = Field(default_factory=list, description="Supporting quotes")
    estimated_rating_lift: float | None = Field(None, description="Projected rating improvement")
    complexity: str = Field("medium", description="quick/medium/complex")
    priority: str = Field("medium", description="critical/high/medium/low")
    timeline: str = Field("This month", description="When to implement")
    related_subcode: str = Field("", description="Related URT subcode")


class TimelineAnnotationResponse(BaseModel):
    """A key event annotation for timeline visualization."""

    date: str = Field(..., description="Event date (YYYY-MM-DD)")
    label: str = Field(..., description="Short label")
    description: str = Field("", description="What happened")
    type: str = Field("neutral", description="positive/negative/neutral/event")


class SynthesisResponse(BaseModel):
    """AI-generated synthesis with narratives and action plans."""

    executive_narrative: str = Field("", description="2-3 paragraph business story")
    sentiment_insight: str = Field("", description="Why sentiment is distributed this way")
    category_insight: str = Field("", description="Pattern in categories")
    timeline_insight: str = Field("", description="Trends over time")
    priority_domain: str | None = Field(None, description="Domain needing most attention (P/V/J/O/A/E/R)")
    priority_issue: str | None = Field(None, description="Subcode to fix first (e.g., V1.03)")
    action_plan: list[ActionItemResponse] = Field(default_factory=list, description="Prioritized actions")
    timeline_annotations: list[TimelineAnnotationResponse] = Field(default_factory=list, description="Key events")
    marketing_angles: list[str] = Field(default_factory=list, description="Ways to promote strengths")
    competitor_context: str | None = Field(None, description="Industry comparison")
    generated_at: str | None = Field(None, description="When synthesis was generated")


class ReviewIQAnalyticsResponse(BaseModel):
    """Complete analytics response for ReviewIQ dashboard."""

    overview: OverviewStats = Field(default_factory=OverviewStats)
    sentiment: SentimentData = Field(default_factory=SentimentData)
    urt: URTData = Field(default_factory=URTData)
    domain_scores: list[DomainScore] = Field(default_factory=list)
    overall_experience_index: float | None = Field(None, description="OEI composite score")
    insights: Insights = Field(default_factory=Insights)
    issues: PaginatedIssues = Field(default_factory=PaginatedIssues)
    spans: PaginatedSpans = Field(default_factory=PaginatedSpans)
    timeline: list[TimelinePoint] = Field(default_factory=list)
    synthesis: SynthesisResponse | None = Field(None, description="AI-generated synthesis")
    filters_applied: dict[str, Any] = Field(default_factory=dict)


# ==================== Helper Functions ====================


def _parse_time_range(time_range: str) -> datetime:
    """Parse time range string to start datetime."""
    now = datetime.now()

    if time_range == "7d":
        return now - timedelta(days=7)
    elif time_range == "14d":
        return now - timedelta(days=14)
    elif time_range == "30d":
        return now - timedelta(days=30)
    elif time_range == "90d":
        return now - timedelta(days=90)
    elif time_range == "1y":
        return now - timedelta(days=365)
    elif time_range == "all":
        return datetime(2000, 1, 1)  # Effectively no time filter
    else:
        # Default to 30 days
        return now - timedelta(days=30)


# Domain configuration
DOMAIN_CONFIG = {
    "O": {"name": "Offering", "owner": "Operations / Product", "green": 80, "yellow": 60, "weight": 0.20},
    "P": {"name": "People", "owner": "HR / Training", "green": 85, "yellow": 70, "weight": 0.18},
    "J": {"name": "Journey", "owner": "Operations / Process", "green": 75, "yellow": 55, "weight": 0.15},
    "E": {"name": "Environment", "owner": "Facilities / IT", "green": 80, "yellow": 65, "weight": 0.12},
    "A": {"name": "Access", "owner": "Compliance / Design", "green": 85, "yellow": 70, "weight": 0.10},
    "V": {"name": "Value", "owner": "Finance / Pricing", "green": 70, "yellow": 50, "weight": 0.12},
    "R": {"name": "Relationship", "owner": "Leadership / CX", "green": 80, "yellow": 60, "weight": 0.13},
}

# Intensity weights for scoring
INTENSITY_WEIGHTS = {"I1": 1.0, "I2": 2.0, "I3": 4.0}

# Legacy mapping for backward compatibility
DOMAIN_NAMES = {k: v["name"] for k, v in DOMAIN_CONFIG.items()}


# ==================== API Endpoint ====================


@router.get("/analytics", response_model=ReviewIQAnalyticsResponse)
async def get_reviewiq_analytics(
    job_id: str | None = Query(None, description="Filter by job ID"),
    business_id: str | None = Query(None, description="Filter by business ID"),
    time_range: str = Query("30d", description="Time range (7d, 14d, 30d, 90d, 1y, all)"),
    sentiment: str | None = Query(None, description="Filter by sentiment (comma-separated: positive,negative)"),
    urt_domain: str | None = Query(None, description="Filter by URT domain (P, J, O, A)"),
    intensity: str | None = Query(None, description="Filter by intensity (I1, I2, I3)"),
    issues_page: int = Query(1, ge=1, description="Issues page number"),
    issues_page_size: int = Query(10, ge=1, le=100, description="Issues per page"),
    spans_page: int = Query(1, ge=1, description="Spans page number"),
    spans_page_size: int = Query(10, ge=1, le=100, description="Spans per page"),
) -> ReviewIQAnalyticsResponse:
    """
    Get all analytics data for ReviewIQ dashboard in a single call.

    Returns overview stats, sentiment distribution, URT breakdown, issues, and spans.
    Supports cross-filtering by sentiment, URT domain, and intensity.
    """
    if not _pool:
        raise HTTPException(status_code=503, detail="Database not initialized")

    # Parse filters
    start_date = _parse_time_range(time_range)
    sentiment_filter = sentiment.split(",") if sentiment else None

    # Build filter conditions
    filters_applied = {
        "time_range": time_range,
        "start_date": start_date.isoformat(),
    }
    if job_id:
        filters_applied["job_id"] = job_id
    if business_id:
        filters_applied["business_id"] = business_id
    if sentiment_filter:
        filters_applied["sentiment"] = sentiment_filter
    if urt_domain:
        filters_applied["urt_domain"] = urt_domain
    if intensity:
        filters_applied["intensity"] = intensity

    async with _pool.acquire() as conn:
        # Query 1: Overview Stats
        overview = await _get_overview_stats(
            conn, job_id, business_id, start_date, sentiment_filter, urt_domain, intensity
        )

        # Query 2: Sentiment Distribution + URT Domain Distribution
        sentiment_data, urt_data = await _get_distributions(
            conn, job_id, business_id, start_date, sentiment_filter, urt_domain, intensity
        )

        # Query 3: Timeline Data
        timeline = await _get_timeline_data(
            conn, job_id, business_id, start_date, sentiment_filter, urt_domain, intensity
        )

        # Query 4: Issues (paginated) - now with enriched URT data
        issues = await _get_issues(
            conn, job_id, business_id, start_date, sentiment_filter, urt_domain, intensity,
            issues_page, issues_page_size
        )

        # Query 5: Spans (paginated)
        spans = await _get_spans(
            conn, job_id, business_id, start_date, sentiment_filter, urt_domain, intensity,
            spans_page, spans_page_size
        )

        # Query 6: Domain KPI Scores
        domain_scores, oei = await _get_domain_scores(
            conn, job_id, business_id, start_date
        )

        # Query 7: Insights (strengths, weaknesses, recommendations)
        insights = await _get_insights(
            conn, job_id, business_id, start_date,
            overview.avg_rating, overview.total_reviews
        )

        # Query 8: AI Synthesis (if available)
        synthesis = await _get_synthesis(conn, job_id)

    return ReviewIQAnalyticsResponse(
        overview=overview,
        sentiment=sentiment_data,
        urt=urt_data,
        domain_scores=domain_scores,
        overall_experience_index=oei,
        insights=insights,
        issues=issues,
        spans=spans,
        timeline=timeline,
        synthesis=synthesis,
        filters_applied=filters_applied,
    )


async def _get_overview_stats(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
    sentiment_filter: list[str] | None,
    urt_domain: str | None,
    intensity: str | None,
) -> OverviewStats:
    """Get overview statistics with a single optimized query."""

    # Build WHERE conditions for spans
    conditions = ["rs.review_time >= $1"]
    params: list[Any] = [start_date]
    param_idx = 2

    if job_id:
        conditions.append(f"rs.job_id = ${param_idx}::uuid")
        params.append(job_id)
        param_idx += 1

    if business_id:
        conditions.append(f"rs.business_id = ${param_idx}")
        params.append(business_id)
        param_idx += 1

    if urt_domain:
        conditions.append(f"LEFT(rs.urt_primary, 1) = ${param_idx}")
        params.append(urt_domain)
        param_idx += 1

    if intensity:
        conditions.append(f"rs.intensity = ${param_idx}")
        params.append(intensity)
        param_idx += 1

    # Valence filter
    valence_condition = ""
    if sentiment_filter:
        valence_codes = []
        if "positive" in sentiment_filter:
            valence_codes.append("V+")
        if "negative" in sentiment_filter:
            valence_codes.extend(["V-", "V±"])
        if "neutral" in sentiment_filter:
            valence_codes.append("V0")
        if valence_codes:
            conditions.append(f"rs.valence = ANY(${param_idx}::text[])")
            params.append(valence_codes)
            param_idx += 1

    where_clause = " AND ".join(conditions)

    query = f"""
        SELECT
            COUNT(DISTINCT re.id) as total_reviews,
            COUNT(rs.span_id) as total_spans,
            AVG(re.rating) as avg_rating,
            COUNT(*) FILTER (WHERE rs.valence = 'V+') as positive_count,
            COUNT(*) FILTER (WHERE rs.valence IN ('V-', 'V±')) as negative_count,
            COUNT(*) FILTER (WHERE rs.valence = 'V0') as neutral_count,
            COUNT(*) FILTER (WHERE rs.valence = 'V±') as mixed_count
        FROM pipeline.review_spans rs
        LEFT JOIN pipeline.reviews_enriched re ON (
            re.source = rs.source
            AND re.review_id = rs.review_id
            AND re.review_version = rs.review_version
        )
        WHERE {where_clause}
    """

    row = await conn.fetchrow(query, *params)

    # Get open issues count separately
    issue_conditions = ["i.state = 'open'"]
    issue_params: list[Any] = []
    issue_param_idx = 1

    if job_id:
        issue_conditions.append(f"i.job_id = ${issue_param_idx}::uuid")
        issue_params.append(job_id)
        issue_param_idx += 1

    if business_id:
        issue_conditions.append(f"i.business_id = ${issue_param_idx}")
        issue_params.append(business_id)
        issue_param_idx += 1

    issue_where = " AND ".join(issue_conditions)
    issue_count = await conn.fetchval(
        f"SELECT COUNT(*) FROM pipeline.issues i WHERE {issue_where}",
        *issue_params
    )

    return OverviewStats(
        total_reviews=row["total_reviews"] or 0,
        total_spans=row["total_spans"] or 0,
        open_issues=issue_count or 0,
        avg_rating=float(row["avg_rating"]) if row["avg_rating"] else None,
        positive_count=row["positive_count"] or 0,
        negative_count=row["negative_count"] or 0,
        neutral_count=row["neutral_count"] or 0,
        mixed_count=row["mixed_count"] or 0,
    )


async def _get_distributions(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
    sentiment_filter: list[str] | None,
    urt_domain: str | None,
    intensity: str | None,
) -> tuple[SentimentData, URTData]:
    """Get sentiment and URT distributions with cross-filtering support."""

    # Build base WHERE conditions (job, business, time)
    base_conditions = ["rs.review_time >= $1"]
    base_params: list[Any] = [start_date]
    param_idx = 2

    if job_id:
        base_conditions.append(f"rs.job_id = ${param_idx}::uuid")
        base_params.append(job_id)
        param_idx += 1

    if business_id:
        base_conditions.append(f"rs.business_id = ${param_idx}")
        base_params.append(business_id)
        param_idx += 1

    base_where = " AND ".join(base_conditions)

    # Convert sentiment filter to valence codes
    valence_codes = []
    if sentiment_filter:
        if "positive" in sentiment_filter:
            valence_codes.append("V+")
        if "negative" in sentiment_filter:
            valence_codes.extend(["V-", "V±"])
        if "neutral" in sentiment_filter:
            valence_codes.append("V0")

    # ========== Sentiment Distribution (filtered by domain) ==========
    sentiment_conditions = list(base_conditions)
    sentiment_params = list(base_params)
    sentiment_param_idx = param_idx

    # Apply domain filter to sentiment (cross-filter: domain → sentiment)
    if urt_domain:
        sentiment_conditions.append(f"LEFT(rs.urt_primary, 1) = ${sentiment_param_idx}")
        sentiment_params.append(urt_domain)
        sentiment_param_idx += 1

    # Apply intensity filter
    if intensity:
        sentiment_conditions.append(f"rs.intensity = ${sentiment_param_idx}")
        sentiment_params.append(intensity)
        sentiment_param_idx += 1

    sentiment_where = " AND ".join(sentiment_conditions)

    # Updated query with review-based counting to avoid bias from verbose reviews
    sentiment_query = f"""
        SELECT
            valence,
            COUNT(*) as span_count,
            COUNT(DISTINCT review_id) as review_count
        FROM pipeline.review_spans rs
        WHERE {sentiment_where} AND valence IS NOT NULL
        GROUP BY valence
        ORDER BY review_count DESC
    """

    sentiment_rows = await conn.fetch(sentiment_query, *sentiment_params)
    # Use review_count for percentages to avoid bias from verbose reviews
    total_reviews = sum(r["review_count"] for r in sentiment_rows)

    sentiment_distribution = [
        SentimentDataPoint(
            valence=row["valence"],
            count=row["span_count"],
            review_count=row["review_count"],
            percentage=(row["review_count"] / total_reviews * 100) if total_reviews > 0 else 0,
        )
        for row in sentiment_rows
    ]

    # ========== Sentiment Trend (filtered by domain) ==========
    trend_query = f"""
        SELECT
            TO_CHAR(DATE_TRUNC('week', rs.review_time), 'IYYY-"W"IW') as period,
            COUNT(*) FILTER (WHERE rs.valence = 'V+') as positive,
            COUNT(*) FILTER (WHERE rs.valence IN ('V-', 'V±')) as negative,
            COUNT(*) FILTER (WHERE rs.valence = 'V0') as neutral,
            COUNT(*) FILTER (WHERE rs.valence = 'V±') as mixed
        FROM pipeline.review_spans rs
        WHERE {sentiment_where}
        GROUP BY DATE_TRUNC('week', rs.review_time)
        ORDER BY DATE_TRUNC('week', rs.review_time)
    """

    trend_rows = await conn.fetch(trend_query, *sentiment_params)
    sentiment_trend = [
        SentimentTrendPoint(
            period=row["period"],
            positive=row["positive"] or 0,
            negative=row["negative"] or 0,
            neutral=row["neutral"] or 0,
            mixed=row["mixed"] or 0,
        )
        for row in trend_rows
    ]

    # ========== URT Domain Distribution (filtered by sentiment) ==========
    urt_conditions = list(base_conditions)
    urt_params = list(base_params)
    urt_param_idx = param_idx

    # Apply sentiment filter to URT domains (cross-filter: sentiment → domain)
    if valence_codes:
        urt_conditions.append(f"rs.valence = ANY(${urt_param_idx}::text[])")
        urt_params.append(valence_codes)
        urt_param_idx += 1

    # Apply intensity filter
    if intensity:
        urt_conditions.append(f"rs.intensity = ${urt_param_idx}")
        urt_params.append(intensity)
        urt_param_idx += 1

    urt_where = " AND ".join(urt_conditions)

    # Updated query with review-based counting to avoid bias from verbose reviews
    urt_query = f"""
        SELECT
            LEFT(urt_primary, 1) as domain,
            COUNT(*) as span_count,
            COUNT(DISTINCT review_id) as review_count,
            COUNT(*) FILTER (WHERE valence = 'V+') as positive_spans,
            COUNT(*) FILTER (WHERE valence IN ('V-', 'V±')) as negative_spans,
            COUNT(*) FILTER (WHERE valence = 'V0') as neutral_spans,
            COUNT(DISTINCT review_id) FILTER (WHERE valence = 'V+') as positive_reviews,
            COUNT(DISTINCT review_id) FILTER (WHERE valence IN ('V-', 'V±')) as negative_reviews
        FROM pipeline.review_spans rs
        WHERE {urt_where} AND urt_primary IS NOT NULL
        GROUP BY LEFT(urt_primary, 1)
        ORDER BY review_count DESC
    """

    urt_rows = await conn.fetch(urt_query, *urt_params)
    # Use review_count for percentages to avoid bias from verbose reviews
    total_reviews = sum(r["review_count"] for r in urt_rows)

    domains = [
        URTDomainPoint(
            domain=row["domain"],
            domain_name=DOMAIN_NAMES.get(row["domain"], row["domain"]),
            count=row["span_count"],
            review_count=row["review_count"],
            percentage=(row["review_count"] / total_reviews * 100) if total_reviews > 0 else 0,
            positive_count=row["positive_spans"] or 0,
            negative_count=row["negative_spans"] or 0,
            neutral_count=row["neutral_spans"] or 0,
            positive_reviews=row["positive_reviews"] or 0,
            negative_reviews=row["negative_reviews"] or 0,
        )
        for row in urt_rows
    ]

    # ========== Intensity Heatmap (filtered by both sentiment and domain) ==========
    heatmap_conditions = list(base_conditions)
    heatmap_params = list(base_params)
    heatmap_param_idx = param_idx

    # Apply domain filter
    if urt_domain:
        heatmap_conditions.append(f"LEFT(rs.urt_primary, 1) = ${heatmap_param_idx}")
        heatmap_params.append(urt_domain)
        heatmap_param_idx += 1

    # Apply sentiment filter
    if valence_codes:
        heatmap_conditions.append(f"rs.valence = ANY(${heatmap_param_idx}::text[])")
        heatmap_params.append(valence_codes)
        heatmap_param_idx += 1

    heatmap_where = " AND ".join(heatmap_conditions)

    heatmap_query = f"""
        SELECT
            LEFT(urt_primary, 1) as domain,
            intensity,
            COUNT(*) as count
        FROM pipeline.review_spans rs
        WHERE {heatmap_where}
            AND urt_primary IS NOT NULL
            AND intensity IS NOT NULL
        GROUP BY LEFT(urt_primary, 1), intensity
        ORDER BY domain, intensity
    """

    heatmap_rows = await conn.fetch(heatmap_query, *heatmap_params)
    intensity_heatmap = [
        IntensityPoint(
            domain=row["domain"],
            intensity=row["intensity"],
            count=row["count"],
        )
        for row in heatmap_rows
    ]

    return (
        SentimentData(distribution=sentiment_distribution, trend=sentiment_trend),
        URTData(domains=domains, intensity_heatmap=intensity_heatmap),
    )


async def _get_timeline_data(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
    sentiment_filter: list[str] | None,
    urt_domain: str | None,
    intensity: str | None,
) -> list[TimelinePoint]:
    """Get timeline data for the brush chart."""

    # Build WHERE conditions
    conditions = ["rs.review_time >= $1"]
    params: list[Any] = [start_date]
    param_idx = 2

    if job_id:
        conditions.append(f"rs.job_id = ${param_idx}::uuid")
        params.append(job_id)
        param_idx += 1

    if business_id:
        conditions.append(f"rs.business_id = ${param_idx}")
        params.append(business_id)
        param_idx += 1

    where_clause = " AND ".join(conditions)

    query = f"""
        SELECT
            TO_CHAR(DATE_TRUNC('week', rs.review_time), 'IYYY-"W"IW') as date,
            COUNT(DISTINCT CONCAT(rs.source, ':', rs.review_id)) as review_count,
            COUNT(*) as span_count,
            AVG(re.rating) as avg_rating,
            COUNT(*) FILTER (WHERE rs.valence = 'V+') as positive_count,
            COUNT(*) FILTER (WHERE rs.valence IN ('V-', 'V±')) as negative_count
        FROM pipeline.review_spans rs
        LEFT JOIN pipeline.reviews_enriched re ON (
            re.source = rs.source
            AND re.review_id = rs.review_id
            AND re.review_version = rs.review_version
        )
        WHERE {where_clause}
        GROUP BY DATE_TRUNC('week', rs.review_time)
        ORDER BY DATE_TRUNC('week', rs.review_time)
    """

    rows = await conn.fetch(query, *params)

    return [
        TimelinePoint(
            date=row["date"],
            review_count=row["review_count"] or 0,
            span_count=row["span_count"] or 0,
            avg_rating=float(row["avg_rating"]) if row["avg_rating"] else None,
            positive_count=row["positive_count"] or 0,
            negative_count=row["negative_count"] or 0,
        )
        for row in rows
    ]


async def _get_issues(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
    sentiment_filter: list[str] | None,
    urt_domain: str | None,
    intensity: str | None,
    page: int,
    page_size: int,
) -> PaginatedIssues:
    """Get paginated issues."""

    # Build WHERE conditions
    conditions = ["1=1"]
    params: list[Any] = []
    param_idx = 1

    if job_id:
        conditions.append(f"i.job_id = ${param_idx}::uuid")
        params.append(job_id)
        param_idx += 1

    if business_id:
        conditions.append(f"i.business_id = ${param_idx}")
        params.append(business_id)
        param_idx += 1

    if urt_domain:
        conditions.append(f"i.domain = ${param_idx}")
        params.append(urt_domain)
        param_idx += 1

    if intensity:
        conditions.append(f"i.max_intensity = ${param_idx}")
        params.append(intensity)
        param_idx += 1

    where_clause = " AND ".join(conditions)

    # Count query
    count_query = f"SELECT COUNT(*) FROM pipeline.issues i WHERE {where_clause}"
    total = await conn.fetchval(count_query, *params)

    # Items query with pagination - enriched with URT metadata
    offset = (page - 1) * page_size
    items_query = f"""
        SELECT
            i.issue_id,
            i.primary_subcode,
            s.name as subcode_name,
            s.definition as subcode_definition,
            s.solution,
            s.solution_complexity,
            s.negative_example,
            i.domain,
            d.name as domain_name,
            d.default_owner,
            c.name as category_name,
            i.entity,
            i.state,
            i.priority_score,
            i.span_count,
            i.max_intensity,
            i.created_at
        FROM pipeline.issues i
        LEFT JOIN pipeline.urt_subcodes s ON i.primary_subcode = s.code
        LEFT JOIN pipeline.urt_domains d ON i.domain = d.code
        LEFT JOIN pipeline.urt_categories c ON s.category_code = c.code
        WHERE {where_clause}
        ORDER BY i.priority_score DESC, i.created_at DESC
        LIMIT ${param_idx} OFFSET ${param_idx + 1}
    """

    rows = await conn.fetch(items_query, *params, page_size, offset)

    items = [
        IssueItem(
            issue_id=row["issue_id"],
            primary_subcode=row["primary_subcode"],
            subcode_name=row["subcode_name"],
            subcode_definition=row["subcode_definition"],
            solution=row["solution"],
            solution_complexity=row["solution_complexity"],
            domain=row["domain"],
            domain_name=row["domain_name"],
            category_name=row["category_name"],
            default_owner=row["default_owner"],
            negative_example=row["negative_example"],
            entity=row["entity"],
            state=row["state"],
            priority_score=float(row["priority_score"]) if row["priority_score"] else 0,
            span_count=row["span_count"] or 0,
            max_intensity=row["max_intensity"],
            created_at=row["created_at"].isoformat() if row["created_at"] else None,
        )
        for row in rows
    ]

    return PaginatedIssues(
        items=items,
        total=total or 0,
        page=page,
        page_size=page_size,
    )


async def _get_spans(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
    sentiment_filter: list[str] | None,
    urt_domain: str | None,
    intensity: str | None,
    page: int,
    page_size: int,
) -> PaginatedSpans:
    """Get paginated spans."""

    # Build WHERE conditions
    conditions = ["rs.review_time >= $1"]
    params: list[Any] = [start_date]
    param_idx = 2

    if job_id:
        conditions.append(f"rs.job_id = ${param_idx}::uuid")
        params.append(job_id)
        param_idx += 1

    if business_id:
        conditions.append(f"rs.business_id = ${param_idx}")
        params.append(business_id)
        param_idx += 1

    if urt_domain:
        conditions.append(f"LEFT(rs.urt_primary, 1) = ${param_idx}")
        params.append(urt_domain)
        param_idx += 1

    if intensity:
        conditions.append(f"rs.intensity = ${param_idx}")
        params.append(intensity)
        param_idx += 1

    # Valence filter
    if sentiment_filter:
        valence_codes = []
        if "positive" in sentiment_filter:
            valence_codes.append("V+")
        if "negative" in sentiment_filter:
            valence_codes.extend(["V-", "V±"])
        if "neutral" in sentiment_filter:
            valence_codes.append("V0")
        if valence_codes:
            conditions.append(f"rs.valence = ANY(${param_idx}::text[])")
            params.append(valence_codes)
            param_idx += 1

    where_clause = " AND ".join(conditions)

    # Count query
    count_query = f"SELECT COUNT(*) FROM pipeline.review_spans rs WHERE {where_clause}"
    total = await conn.fetchval(count_query, *params)

    # Items query with pagination
    offset = (page - 1) * page_size
    items_query = f"""
        SELECT
            rs.span_id,
            rs.span_text,
            rs.urt_primary,
            rs.valence,
            rs.intensity,
            rs.review_time,
            rs.review_id as source_review_id,
            rs.entity
        FROM pipeline.review_spans rs
        WHERE {where_clause}
        ORDER BY rs.review_time DESC
        LIMIT ${param_idx} OFFSET ${param_idx + 1}
    """

    rows = await conn.fetch(items_query, *params, page_size, offset)

    items = [
        SpanItem(
            span_id=row["span_id"],
            span_text=row["span_text"],
            urt_primary=row["urt_primary"],
            valence=row["valence"],
            intensity=row["intensity"],
            review_time=row["review_time"].isoformat() if row["review_time"] else None,
            source_review_id=row["source_review_id"],
            entity=row["entity"],
        )
        for row in rows
    ]

    return PaginatedSpans(
        items=items,
        total=total or 0,
        page=page,
        page_size=page_size,
    )


async def _get_domain_scores(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
) -> tuple[list[DomainScore], float | None]:
    """Calculate domain-level KPI scores using intensity-weighted scoring."""

    # Build WHERE conditions
    conditions = ["rs.review_time >= $1"]
    params: list[Any] = [start_date]
    param_idx = 2

    if job_id:
        conditions.append(f"rs.job_id = ${param_idx}::uuid")
        params.append(job_id)
        param_idx += 1

    if business_id:
        conditions.append(f"rs.business_id = ${param_idx}")
        params.append(business_id)
        param_idx += 1

    where_clause = " AND ".join(conditions)

    # Query to get sentiment counts by domain with intensity weighting
    query = f"""
        SELECT
            LEFT(rs.urt_primary, 1) as domain,
            rs.valence,
            rs.intensity,
            COUNT(*) as count
        FROM pipeline.review_spans rs
        WHERE {where_clause}
            AND rs.urt_primary IS NOT NULL
            AND rs.valence IS NOT NULL
        GROUP BY LEFT(rs.urt_primary, 1), rs.valence, rs.intensity
        ORDER BY domain
    """

    rows = await conn.fetch(query, *params)

    # Aggregate by domain
    domain_data: dict[str, dict[str, float]] = {}
    for row in rows:
        domain = row["domain"]
        if domain not in domain_data:
            domain_data[domain] = {
                "positive_weight": 0, "negative_weight": 0, "total_weight": 0,
                "positive_count": 0, "negative_count": 0, "total_count": 0
            }

        intensity = row["intensity"] or "I1"
        weight = INTENSITY_WEIGHTS.get(intensity, 1.0)
        count = row["count"]

        domain_data[domain]["total_weight"] += weight * count
        domain_data[domain]["total_count"] += count

        if row["valence"] == "V+":
            domain_data[domain]["positive_weight"] += weight * count
            domain_data[domain]["positive_count"] += count
        elif row["valence"] in ("V-", "V±"):
            domain_data[domain]["negative_weight"] += weight * count
            domain_data[domain]["negative_count"] += count

    # Calculate scores
    domain_scores = []
    for domain, cfg in DOMAIN_CONFIG.items():
        data = domain_data.get(domain, {
            "positive_weight": 0, "negative_weight": 0, "total_weight": 0,
            "positive_count": 0, "negative_count": 0, "total_count": 0
        })

        total = data["total_weight"]
        if total > 0:
            # Score = 50 + (positive - negative) / total * 50
            # This gives 0-100 scale where 50 is neutral
            score = 50 + ((data["positive_weight"] - data["negative_weight"]) / total) * 50
            score = max(0, min(100, score))
        else:
            score = 50  # Neutral if no data

        # Determine status based on thresholds
        if score >= cfg["green"]:
            status = "good"
        elif score >= cfg["yellow"]:
            status = "warning"
        else:
            status = "critical"

        domain_scores.append(DomainScore(
            domain=domain,
            name=cfg["name"],
            score=round(score, 1),
            status=status,
            trend=None,  # TODO: Calculate trend vs previous period
            positive_count=int(data["positive_count"]),
            negative_count=int(data["negative_count"]),
            total_count=int(data["total_count"]),
        ))

    # Calculate Overall Experience Index (OEI)
    oei = None
    if domain_scores:
        weighted_sum = sum(
            ds.score * DOMAIN_CONFIG[ds.domain]["weight"]
            for ds in domain_scores
            if ds.domain in DOMAIN_CONFIG
        )
        total_weight = sum(
            DOMAIN_CONFIG[ds.domain]["weight"]
            for ds in domain_scores
            if ds.domain in DOMAIN_CONFIG
        )
        if total_weight > 0:
            oei = round(weighted_sum / total_weight, 1)

    return domain_scores, oei


async def _get_insights(
    conn: asyncpg.Connection,
    job_id: str | None,
    business_id: str | None,
    start_date: datetime,
    avg_rating: float | None,
    total_reviews: int,
) -> Insights:
    """Generate strengths, weaknesses, and business insights."""

    # Build WHERE conditions
    conditions = ["rs.review_time >= $1"]
    params: list[Any] = [start_date]
    param_idx = 2

    if job_id:
        conditions.append(f"rs.job_id = ${param_idx}::uuid")
        params.append(job_id)
        param_idx += 1

    if business_id:
        conditions.append(f"rs.business_id = ${param_idx}")
        params.append(business_id)
        param_idx += 1

    where_clause = " AND ".join(conditions)

    # Query: Get subcode-level sentiment distribution with URT metadata
    query = f"""
        SELECT
            rs.urt_primary as subcode,
            s.name as subcode_name,
            s.solution,
            s.solution_complexity,
            s.marketing_angle,
            s.negative_example,
            LEFT(rs.urt_primary, 1) as domain,
            d.name as domain_name,
            d.default_owner,
            COUNT(*) as total_count,
            COUNT(*) FILTER (WHERE rs.valence = 'V+') as positive_count,
            COUNT(*) FILTER (WHERE rs.valence IN ('V-', 'V±')) as negative_count,
            MAX(rs.intensity) as max_intensity
        FROM pipeline.review_spans rs
        LEFT JOIN pipeline.urt_subcodes s ON rs.urt_primary = s.code
        LEFT JOIN pipeline.urt_domains d ON LEFT(rs.urt_primary, 1) = d.code
        WHERE {where_clause}
            AND rs.urt_primary IS NOT NULL
        GROUP BY rs.urt_primary, s.name, s.solution, s.solution_complexity,
                 s.marketing_angle, s.negative_example, LEFT(rs.urt_primary, 1),
                 d.name, d.default_owner
        HAVING COUNT(*) >= 2
        ORDER BY COUNT(*) DESC
    """

    rows = await conn.fetch(query, *params)

    # Separate into strengths and weaknesses
    strengths = []
    weaknesses = []
    # Store tuples of (subcode, count, complexity) for coordinate calculation
    quick_wins_raw = []
    critical_raw = []
    nice_to_have_raw = []
    strategic_raw = []

    # Calculate median for opportunity matrix
    counts = [r["total_count"] for r in rows]
    median_count = sorted(counts)[len(counts) // 2] if counts else 0
    max_count = max(counts) if counts else 1
    min_count = min(counts) if counts else 0

    for row in rows:
        total = row["total_count"]
        positive = row["positive_count"]
        negative = row["negative_count"]
        pos_pct = (positive / total * 100) if total > 0 else 0
        neg_pct = (negative / total * 100) if total > 0 else 0

        subcode = row["subcode"]
        complexity = row["solution_complexity"] or "medium"

        # Strengths: >= 70% positive
        if pos_pct >= 70 and len(strengths) < 5:
            strengths.append(StrengthItem(
                rank=len(strengths) + 1,
                subcode=subcode,
                subcode_name=row["subcode_name"] or subcode,
                domain=row["domain"],
                domain_name=row["domain_name"] or row["domain"],
                positive_percentage=round(pos_pct, 1),
                span_count=total,
                marketing_angle=row["marketing_angle"],
            ))

        # Weaknesses: >= 40% negative
        if neg_pct >= 40 and len(weaknesses) < 5:
            # Calculate projected rating impact
            impact = None
            if avg_rating and total_reviews > 0:
                # Simplified model: impact = (negative_spans / total_reviews) * avg_intensity_loss
                intensity_loss = {"I1": 0.5, "I2": 1.0, "I3": 2.0}.get(row["max_intensity"], 0.5)
                impact = round((negative / total_reviews) * intensity_loss, 2)

            weaknesses.append(WeaknessItem(
                rank=len(weaknesses) + 1,
                issue_id=None,  # Could link to issue if exists
                subcode=subcode,
                subcode_name=row["subcode_name"] or subcode,
                domain=row["domain"],
                domain_name=row["domain_name"] or row["domain"],
                negative_percentage=round(neg_pct, 1),
                span_count=negative,
                intensity=row["max_intensity"],
                solution=row["solution"],
                solution_complexity=complexity,
                projected_rating_impact=impact,
                owner=row["default_owner"],
            ))

        # Opportunity matrix (for weaknesses only)
        if neg_pct >= 40:
            is_high_freq = total >= median_count
            is_simple = complexity == "simple"
            name = row["subcode_name"] or subcode

            # Build detail dict for the opportunity item
            item_data = {
                "subcode": subcode,
                "name": name,
                "count": total,
                "complexity": complexity,
                "domain": row["domain"],
                "domain_name": row["domain_name"] or row["domain"],
                "negative_pct": round(neg_pct, 1),
                "span_count": negative,
                "solution": row["solution"],
                "rating_impact": impact,
                "owner": row["default_owner"],
                "example": row["negative_example"],
            }

            if is_high_freq and is_simple:
                quick_wins_raw.append(item_data)
            elif is_high_freq and not is_simple:
                critical_raw.append(item_data)
            elif not is_high_freq and is_simple:
                nice_to_have_raw.append(item_data)
            else:
                strategic_raw.append(item_data)

    # Helper to compute coordinates for opportunity items and fetch spans
    async def compute_opportunity_items(
        items: list[dict], is_high_freq: bool
    ) -> list[OpportunityItem]:
        if not items:
            return []
        # Get min/max counts within this quadrant for x-axis normalization
        quadrant_counts = [item["count"] for item in items]
        q_min = min(quadrant_counts)
        q_max = max(quadrant_counts)
        q_range = q_max - q_min if q_max > q_min else 1

        result = []
        for item in items[:5]:
            count = item["count"]
            complexity = item["complexity"]
            # X: frequency within quadrant (0.1 to 0.9 to keep items away from edges)
            x = 0.1 + 0.8 * ((count - q_min) / q_range)
            # Y: effort based on complexity (simple=0.2, medium=0.5, complex=0.8)
            effort_map = {"simple": 0.2, "medium": 0.5, "complex": 0.8}
            y = effort_map.get(complexity, 0.5)
            # Add small jitter to prevent overlap
            import random
            x = max(0.05, min(0.95, x + random.uniform(-0.05, 0.05)))
            y = max(0.05, min(0.95, y + random.uniform(-0.08, 0.08)))

            # Fetch sample spans for this subcode (negative sentiment only)
            # Use original 'text' column since spans were extracted from it
            spans_query = """
                SELECT
                    rs.span_id,
                    rs.span_text,
                    re.rating,
                    rs.review_id,
                    re.review_time::text as review_date,
                    re.text as review_text
                FROM pipeline.review_spans rs
                LEFT JOIN pipeline.reviews_enriched re ON (
                    re.source = rs.source
                    AND re.review_id = rs.review_id
                    AND re.review_version = rs.review_version
                )
                WHERE rs.urt_primary = $1
                  AND rs.valence IN ('V-', 'V±')
                ORDER BY re.review_time DESC NULLS LAST
                LIMIT 15
            """
            span_rows = await conn.fetch(spans_query, item["subcode"])
            spans = [
                OpportunitySpan(
                    span_id=row["span_id"],
                    span_text=row["span_text"] or "",
                    review_text=row["review_text"][:500] if row["review_text"] else None,
                    rating=row["rating"],
                    review_id=row["review_id"],
                    review_date=row["review_date"][:10] if row["review_date"] else None,
                )
                for row in span_rows
            ]

            result.append(OpportunityItem(
                subcode=item["subcode"],
                name=item["name"],
                x=round(x, 3),
                y=round(y, 3),
                domain=item["domain"],
                domain_name=item["domain_name"],
                negative_pct=item["negative_pct"],
                span_count=item["span_count"],
                solution=item["solution"],
                complexity=complexity,
                rating_impact=item["rating_impact"],
                owner=item["owner"],
                example=item["example"],
                spans=spans,
            ))
        return result

    quick_wins = await compute_opportunity_items(quick_wins_raw, is_high_freq=True)
    critical = await compute_opportunity_items(critical_raw, is_high_freq=True)
    nice_to_have = await compute_opportunity_items(nice_to_have_raw, is_high_freq=False)
    strategic = await compute_opportunity_items(strategic_raw, is_high_freq=False)

    # Generate executive summary
    summary = ""
    if weaknesses:
        top_weakness = weaknesses[0]
        summary = f"Your biggest opportunity is improving {top_weakness.subcode_name} ({top_weakness.domain_name}) - {top_weakness.negative_percentage:.0f}% of mentions are negative. "
    if strengths:
        top_strength = strengths[0]
        summary += f"{top_strength.subcode_name} is your strongest asset with {top_strength.positive_percentage:.0f}% positive sentiment."

    # Rating simulator
    rating_simulator = None
    if avg_rating and weaknesses:
        impacts = [w.projected_rating_impact or 0 for w in weaknesses]
        if_fix_top_1 = round(avg_rating + impacts[0], 2) if len(impacts) >= 1 else None
        if_fix_top_3 = round(avg_rating + sum(impacts[:3]), 2) if len(impacts) >= 3 else None
        potential = round(sum(impacts[:5]), 2)

        rating_simulator = RatingSimulator(
            current_rating=round(avg_rating, 2),
            if_fix_top_1=if_fix_top_1,
            if_fix_top_3=if_fix_top_3,
            potential_gain=potential,
        )

    return Insights(
        strengths=strengths,
        weaknesses=weaknesses,
        rating_simulator=rating_simulator,
        opportunity_matrix=OpportunityMatrix(
            quick_wins=quick_wins,
            critical=critical,
            nice_to_have=nice_to_have,
            strategic=strategic,
        ),
        executive_summary=summary,
    )


async def _get_synthesis(
    conn: asyncpg.Connection,
    job_id: str | None,
) -> SynthesisResponse | None:
    """Fetch AI-generated synthesis from pipeline execution."""
    if not job_id:
        return None

    try:
        # Get the latest execution with synthesis for this job
        row = await conn.fetchrow("""
            SELECT synthesis
            FROM pipeline.executions
            WHERE job_id = $1::uuid
              AND synthesis IS NOT NULL
            ORDER BY created_at DESC
            LIMIT 1
        """, job_id)

        if not row or not row["synthesis"]:
            return None

        data = row["synthesis"]
        # asyncpg may return JSONB as dict or string
        if isinstance(data, str):
            import json
            data = json.loads(data)

        # Convert to response model
        action_plan = [
            ActionItemResponse(
                id=a.get("id", f"action_{i}"),
                title=a.get("title", ""),
                why=a.get("why", ""),
                what=a.get("what", ""),
                who=a.get("who", ""),
                impact=a.get("impact", ""),
                evidence=a.get("evidence", []),
                estimated_rating_lift=a.get("estimated_rating_lift"),
                complexity=a.get("complexity", "medium"),
                priority=a.get("priority", "medium"),
                timeline=a.get("timeline", "This month"),
                related_subcode=a.get("related_subcode", ""),
            )
            for i, a in enumerate(data.get("action_plan", []))
        ]

        timeline_annotations = [
            TimelineAnnotationResponse(
                date=t.get("date", ""),
                label=t.get("label", ""),
                description=t.get("description", ""),
                type=t.get("type", "neutral"),
            )
            for t in data.get("timeline_annotations", [])
        ]

        return SynthesisResponse(
            executive_narrative=data.get("executive_narrative", ""),
            sentiment_insight=data.get("sentiment_insight", ""),
            category_insight=data.get("category_insight", ""),
            timeline_insight=data.get("timeline_insight", ""),
            priority_domain=data.get("priority_domain"),
            priority_issue=data.get("priority_issue"),
            action_plan=action_plan,
            timeline_annotations=timeline_annotations,
            marketing_angles=data.get("marketing_angles", []),
            competitor_context=data.get("competitor_context"),
            generated_at=data.get("generated_at"),
        )

    except Exception as e:
        log.warning(f"Failed to fetch synthesis for job {job_id}: {e}")
        return None


# ==================== Drill-down Endpoints ====================


@router.get("/issues/{issue_id}/spans", response_model=list[SpanItem])
async def get_issue_spans(issue_id: str) -> list[SpanItem]:
    """Get all spans related to a specific issue."""
    if not _pool:
        raise HTTPException(status_code=503, detail="Database not initialized")

    async with _pool.acquire() as conn:
        query = """
            SELECT
                rs.span_id,
                rs.span_text,
                rs.urt_primary,
                rs.valence,
                rs.intensity,
                rs.review_time,
                rs.review_id as source_review_id,
                rs.entity
            FROM pipeline.review_spans rs
            JOIN pipeline.issue_spans iss ON rs.span_id = iss.span_id
            WHERE iss.issue_id = $1
            ORDER BY rs.review_time DESC
        """
        rows = await conn.fetch(query, issue_id)

    return [
        SpanItem(
            span_id=row["span_id"],
            span_text=row["span_text"],
            urt_primary=row["urt_primary"],
            valence=row["valence"],
            intensity=row["intensity"],
            review_time=row["review_time"].isoformat() if row["review_time"] else None,
            source_review_id=row["source_review_id"],
            entity=row["entity"],
        )
        for row in rows
    ]


# ==================== Full Review Drill-Down ====================


class ReviewSpan(BaseModel):
    """A span within a review with its classification."""

    span_id: str
    span_text: str
    start_offset: int | None = Field(None, description="Character offset in original text")
    end_offset: int | None = Field(None, description="Character end offset")
    urt_primary: str | None
    urt_secondary: list[str] | None = None
    valence: str | None
    intensity: str | None
    entity: str | None


class FullReview(BaseModel):
    """Complete review with all spans and metadata for drill-down."""

    review_id: str
    source: str
    rating: int | None
    review_text: str | None
    text_normalized: str | None = None  # Text used for span offset calculation
    review_time: str | None
    author_name: str | None = None
    author_url: str | None = None
    review_url: str | None = None
    business_name: str | None = None
    # Composite URT (derived from spans)
    urt_primary: str | None = None
    urt_secondary: list[str] | None = None
    # All classified spans
    spans: list[ReviewSpan] = Field(default_factory=list)


@router.get("/reviews/{review_id}", response_model=FullReview)
async def get_full_review(
    review_id: str,
    source: str = Query("google", description="Review source (default: google)"),
) -> FullReview:
    """
    Get a full review with all its classified spans.

    This enables drill-down from any aggregate metric to the raw source data.
    Spans are returned with their classifications, allowing the UI to highlight
    them within the original review text.
    """
    if not _pool:
        raise HTTPException(status_code=503, detail="Database not initialized")

    async with _pool.acquire() as conn:
        # Get the review with latest version
        # Join with reviews_raw to get author info
        # Note: span offsets are computed against text_normalized, so we return both
        review_query = """
            SELECT
                re.review_id,
                re.source,
                re.rating,
                re.text as review_text,
                re.text_normalized,
                re.review_time,
                rr.reviewer_name as author_name,
                re.business_id,
                re.place_id,
                re.urt_primary,
                re.urt_secondary
            FROM pipeline.reviews_enriched re
            LEFT JOIN pipeline.reviews_raw rr ON re.raw_id = rr.id
            WHERE re.review_id = $1 AND re.source = $2
            ORDER BY re.review_version DESC
            LIMIT 1
        """
        review_row = await conn.fetchrow(review_query, review_id, source)

        if not review_row:
            # Try without source filter in case source is different
            review_row = await conn.fetchrow("""
                SELECT
                    re.review_id,
                    re.source,
                    re.rating,
                    re.text as review_text,
                    re.text_normalized,
                    re.review_time,
                    rr.reviewer_name as author_name,
                    re.business_id,
                    re.place_id,
                    re.urt_primary,
                    re.urt_secondary
                FROM pipeline.reviews_enriched re
                LEFT JOIN pipeline.reviews_raw rr ON re.raw_id = rr.id
                WHERE re.review_id = $1
                ORDER BY re.review_version DESC
                LIMIT 1
            """, review_id)

        if not review_row:
            raise HTTPException(status_code=404, detail=f"Review {review_id} not found")

        # Get all spans for this review (use the actual source from found review)
        actual_source = review_row["source"]
        spans_query = """
            SELECT
                rs.span_id,
                rs.span_text,
                rs.span_start as start_offset,
                rs.span_end as end_offset,
                rs.urt_primary,
                rs.urt_secondary,
                rs.valence,
                rs.intensity,
                rs.entity
            FROM pipeline.review_spans rs
            WHERE rs.review_id = $1 AND rs.source = $2
            ORDER BY rs.span_start, rs.span_id
        """
        span_rows = await conn.fetch(spans_query, review_id, actual_source)

    spans = [
        ReviewSpan(
            span_id=row["span_id"],
            span_text=row["span_text"],
            start_offset=row.get("start_offset"),
            end_offset=row.get("end_offset"),
            urt_primary=row["urt_primary"],
            urt_secondary=row.get("urt_secondary"),
            valence=row["valence"],
            intensity=row["intensity"],
            entity=row.get("entity"),
        )
        for row in span_rows
    ]

    # Construct Google Maps review URL if we have place_id
    place_id = review_row.get("place_id")
    review_url = None
    if place_id and review_row["source"] == "google":
        review_url = f"https://www.google.com/maps/place/?q=place_id:{place_id}"

    return FullReview(
        review_id=review_row["review_id"],
        source=review_row["source"],
        rating=review_row["rating"],
        review_text=review_row["review_text"],
        text_normalized=review_row.get("text_normalized"),
        review_time=review_row["review_time"].isoformat() if review_row["review_time"] else None,
        author_name=review_row.get("author_name"),
        author_url=None,  # Not stored in DB
        review_url=review_url,
        business_name=review_row.get("business_id"),  # Use business_id as fallback
        urt_primary=review_row.get("urt_primary"),
        urt_secondary=review_row.get("urt_secondary"),
        spans=spans,
    )


@router.get("/reviews", response_model=PaginatedSpans)
async def get_reviews_by_filter(
    job_id: str | None = Query(None, description="Filter by job ID"),
    urt_domain: str | None = Query(None, description="Filter by URT domain"),
    sentiment: str | None = Query(None, description="Filter by sentiment"),
    intensity: str | None = Query(None, description="Filter by intensity"),
    page: int = Query(1, ge=1),
    page_size: int = Query(20, ge=1, le=100),
) -> PaginatedSpans:
    """
    Get reviews matching specific filters.

    Used for drilling down from chart segments to see contributing reviews.
    """
    if not _pool:
        raise HTTPException(status_code=503, detail="Database not initialized")

    # Reuse _get_spans with the filters
    async with _pool.acquire() as conn:
        sentiment_filter = sentiment.split(",") if sentiment else None
        start_date = datetime(2000, 1, 1)  # No time filter for drill-down

        return await _get_spans(
            conn, job_id, None, start_date, sentiment_filter,
            urt_domain, intensity, page, page_size
        )