whyrating-engine-legacy/packages/reviewiq-pipeline/scripts/validate_l1_configs.py

#!/usr/bin/env python3
"""
Wave 1 L1 Config Validation Script

Validates L1 primitive configs against real review data by analyzing:
1. Coverage: % of spans mapped to enabled primitives
2. Top primitives by frequency
3. Disabled primitives appearing (potential misconfig)
4. Weight effectiveness

Usage:
    python validate_l1_configs.py --sector ENTERTAINMENT --job-url "gokarts"
    python validate_l1_configs.py --sector AUTOMOTIVE --job-url "clickrent"
    python validate_l1_configs.py --all
"""

import argparse
import asyncio
import json
import os
import sys
from collections import Counter, defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import asyncpg

# Paths
DATA_DIR = Path(__file__).parent.parent / "data"
CONFIGS_DIR = DATA_DIR / "primitive_configs" / "l1"
BRIEFS_DIR = DATA_DIR / "sector_briefs"

# Primitive to URT domain mapping
# Primitives map to URT domains: O=Offering, P=People, J=Journey, E=Environment, A=Access, V=Value, R=Relationship
PRIMITIVE_TO_DOMAIN = {
    # Quality -> Offering (O)
    "TASTE": "O", "CRAFT": "O", "FRESHNESS": "O", "TEMPERATURE": "O",
    "EFFECTIVENESS": "O", "ACCURACY": "O", "CONDITION": "O", "CONSISTENCY": "O",
    # Service -> People (P)
    "MANNER": "P", "COMPETENCE": "P", "ATTENTIVENESS": "P", "COMMUNICATION": "P",
    # Process -> Journey (J)
    "SPEED": "J", "FRICTION": "J", "RELIABILITY": "J", "AVAILABILITY": "J",
    # Environment -> Environment (E)
    "CLEANLINESS": "E", "COMFORT": "E", "SAFETY": "E", "AMBIANCE": "E",
    "ACCESSIBILITY": "E", "DIGITAL_UX": "E",
    # Value -> Value (V)
    "PRICE_LEVEL": "V", "PRICE_FAIRNESS": "V", "PRICE_TRANSPARENCY": "V", "VALUE_FOR_MONEY": "V",
}

# URT code to primitive mapping (simplified - maps URT codes to closest primitive)
URT_TO_PRIMITIVE = {
    # Offering codes
    "O1.01": "CONSISTENCY", "O1.02": "CRAFT", "O1.03": "FRESHNESS",
    "O1.04": "EFFECTIVENESS", "O1.05": "TASTE", "O1.06": "CONDITION",
    "O2.01": "ACCURACY", "O2.02": "EFFECTIVENESS", "O2.03": "CRAFT",
    "O3.01": "ACCURACY", "O3.02": "CONSISTENCY", "O3.03": "EFFECTIVENESS",
    # People codes
    "P1.01": "MANNER", "P1.02": "MANNER", "P1.03": "ATTENTIVENESS",
    "P1.04": "COMMUNICATION", "P1.05": "ATTENTIVENESS",
    "P2.01": "COMPETENCE", "P2.02": "COMPETENCE", "P2.03": "COMPETENCE",
    "P3.01": "COMMUNICATION", "P3.02": "COMMUNICATION", "P3.03": "COMMUNICATION",
    # Journey codes
    "J1.01": "SPEED", "J1.02": "RELIABILITY", "J1.03": "FRICTION",
    "J1.04": "SPEED", "J1.05": "RELIABILITY",
    "J2.01": "RELIABILITY", "J2.02": "RELIABILITY", "J2.03": "FRICTION",
    "J3.01": "FRICTION", "J3.02": "FRICTION", "J3.03": "FRICTION",
    # Environment codes
    "E1.01": "CLEANLINESS", "E1.02": "COMFORT", "E1.03": "AMBIANCE",
    "E1.04": "AMBIANCE", "E1.05": "COMFORT",
    "E2.01": "AMBIANCE", "E2.02": "COMFORT", "E2.03": "COMFORT",
    "E2.04": "AMBIANCE", "E2.05": "DIGITAL_UX",
    "E3.01": "SAFETY", "E3.02": "SAFETY", "E3.03": "ACCESSIBILITY",
    "E4.01": "ACCESSIBILITY", "E4.02": "ACCESSIBILITY", "E4.03": "DIGITAL_UX",
    # Access codes
    "A1.01": "AVAILABILITY", "A1.02": "AVAILABILITY", "A1.03": "AVAILABILITY",
    "A1.04": "ACCESSIBILITY", "A1.05": "ACCESSIBILITY",
    "A2.01": "ACCESSIBILITY", "A2.02": "ACCESSIBILITY", "A2.03": "DIGITAL_UX",
    "A3.01": "ACCESSIBILITY", "A3.02": "ACCESSIBILITY", "A3.03": "SPEED",
    "A4.01": "ACCESSIBILITY", "A4.02": "ACCESSIBILITY", "A4.03": "AVAILABILITY",
    # Value codes
    "V1.01": "PRICE_LEVEL", "V1.02": "PRICE_FAIRNESS", "V1.03": "PRICE_TRANSPARENCY",
    "V2.01": "PRICE_FAIRNESS", "V2.02": "PRICE_TRANSPARENCY", "V2.03": "VALUE_FOR_MONEY",
    "V3.01": "VALUE_FOR_MONEY", "V3.02": "VALUE_FOR_MONEY", "V3.03": "PRICE_FAIRNESS",
    "V4.01": "VALUE_FOR_MONEY", "V4.02": "VALUE_FOR_MONEY", "V4.03": "VALUE_FOR_MONEY",
    # Relationship codes
    "R1.01": "RELIABILITY", "R1.02": "RELIABILITY", "R1.03": "RELIABILITY",
    "R2.01": "RELIABILITY", "R2.02": "CONSISTENCY", "R2.03": "RELIABILITY",
    "R3.01": "MANNER", "R3.02": "MANNER", "R3.03": "COMMUNICATION",
    "R4.01": "CONSISTENCY", "R4.02": "RELIABILITY", "R4.03": "CONSISTENCY",
}


@dataclass
class ValidationResult:
    """Validation results for a sector."""
    sector_code: str
    job_count: int
    review_count: int
    span_count: int

    # Coverage metrics
    enabled_coverage: float  # % spans using enabled primitives
    disabled_hits: dict[str, int]  # disabled primitives that appeared
    unmapped_count: int  # spans that couldn't be mapped

    # Distribution
    primitive_counts: dict[str, int]  # all primitives by count
    domain_distribution: dict[str, int]  # O, P, J, E, A, V, R
    valence_distribution: dict[str, int]  # V+, V-, V0, V±

    # Top codes
    top_urt_codes: list[tuple[str, int]]

    # Recommendations
    recommendations: list[str]


def load_l1_config(sector_code: str) -> dict[str, Any] | None:
    """Load L1 config for a sector."""
    config_file = CONFIGS_DIR / f"{sector_code.lower()}_config.json"
    if not config_file.exists():
        return None
    with open(config_file) as f:
        return json.load(f)


def load_sector_brief(sector_code: str) -> dict[str, Any] | None:
    """Load sector brief for a sector."""
    brief_file = BRIEFS_DIR / f"{sector_code.lower()}_brief.json"
    if not brief_file.exists():
        return None
    with open(brief_file) as f:
        return json.load(f)


def map_urt_to_primitive(urt_code: str) -> str | None:
    """Map URT code to primitive."""
    return URT_TO_PRIMITIVE.get(urt_code)


async def fetch_spans_for_jobs(pool: asyncpg.Pool, job_url_pattern: str) -> list[dict]:
    """Fetch spans for jobs matching URL pattern."""
    query = """
        SELECT
            rs.urt_primary,
            rs.valence,
            rs.intensity,
            rs.span_text,
            j.url
        FROM pipeline.review_spans rs
        JOIN pipeline.reviews_raw rr ON rs.review_id = rr.review_id
        JOIN public.jobs j ON rr.job_id = j.job_id
        WHERE LOWER(j.url) LIKE $1
        ORDER BY rs.created_at DESC
    """
    rows = await pool.fetch(query, f"%{job_url_pattern.lower()}%")
    return [dict(row) for row in rows]


async def fetch_all_spans(pool: asyncpg.Pool) -> list[dict]:
    """Fetch all spans from database."""
    query = """
        SELECT
            urt_primary,
            valence,
            intensity,
            span_text
        FROM pipeline.review_spans
        ORDER BY created_at DESC
    """
    rows = await pool.fetch(query)
    return [dict(row) for row in rows]


def analyze_spans(
    spans: list[dict],
    config: dict[str, Any],
) -> ValidationResult:
    """Analyze spans against L1 config."""
    sector_code = config["sector_code"]
    enabled = set(config.get("enabled", []))
    disabled = set(config.get("disabled", []))
    weights = config.get("weights", {})

    # Counters
    primitive_counts: Counter = Counter()
    domain_counts: Counter = Counter()
    valence_counts: Counter = Counter()
    urt_counts: Counter = Counter()
    disabled_hits: Counter = Counter()
    unmapped = 0
    enabled_hits = 0

    for span in spans:
        urt_code = span["urt_primary"]
        valence = span.get("valence", "V0")

        # Count URT codes
        urt_counts[urt_code] += 1

        # Count valence
        valence_counts[valence] += 1

        # Map to primitive
        primitive = map_urt_to_primitive(urt_code)
        if primitive:
            primitive_counts[primitive] += 1

            # Count domain
            domain = PRIMITIVE_TO_DOMAIN.get(primitive, urt_code[0])
            domain_counts[domain] += 1

            # Check if enabled or disabled
            if primitive in enabled:
                enabled_hits += 1
            elif primitive in disabled:
                disabled_hits[primitive] += 1
        else:
            unmapped += 1
            # Still count domain from URT code
            domain_counts[urt_code[0]] += 1

    # Calculate coverage
    total = len(spans)
    enabled_coverage = enabled_hits / total if total > 0 else 0

    # Generate recommendations
    recommendations = []

    # Check disabled primitives that appeared frequently
    for prim, count in disabled_hits.most_common(5):
        if count >= 10:
            pct = count / total * 100
            recommendations.append(
                f"ENABLE {prim}: Disabled but appeared {count} times ({pct:.1f}%)"
            )

    # Check for missing high-weight primitives
    weighted_set = set(weights.keys())
    for prim in weighted_set:
        if primitive_counts[prim] == 0 and prim in enabled:
            recommendations.append(
                f"CHECK {prim}: Weighted ({weights[prim]}x) but no appearances"
            )

    # Check for frequently appearing unweighted primitives
    for prim, count in primitive_counts.most_common(10):
        if prim in enabled and prim not in weights and count >= total * 0.1:
            pct = count / total * 100
            recommendations.append(
                f"WEIGHT {prim}: High frequency ({count}, {pct:.1f}%) but not weighted"
            )

    return ValidationResult(
        sector_code=sector_code,
        job_count=1,  # Will be updated by caller
        review_count=0,  # Not tracked at span level
        span_count=total,
        enabled_coverage=enabled_coverage,
        disabled_hits=dict(disabled_hits),
        unmapped_count=unmapped,
        primitive_counts=dict(primitive_counts),
        domain_distribution=dict(domain_counts),
        valence_distribution=dict(valence_counts),
        top_urt_codes=urt_counts.most_common(15),
        recommendations=recommendations,
    )


def print_validation_report(result: ValidationResult, config: dict, brief: dict | None):
    """Print formatted validation report."""
    print("\n" + "=" * 70)
    print(f"VALIDATION REPORT: {result.sector_code}")
    print("=" * 70)

    # Overview
    print(f"\n📊 OVERVIEW")
    print(f"   Spans analyzed: {result.span_count:,}")
    print(f"   Enabled coverage: {result.enabled_coverage:.1%}")
    print(f"   Unmapped spans: {result.unmapped_count} ({result.unmapped_count/result.span_count*100:.1f}%)" if result.span_count > 0 else "   No spans")

    # Config summary
    print(f"\n⚙️  CONFIG SUMMARY")
    print(f"   Enabled: {len(config.get('enabled', []))} primitives")
    print(f"   Disabled: {len(config.get('disabled', []))} primitives")
    print(f"   Weighted: {len(config.get('weights', {}))} primitives")

    # Domain distribution
    print(f"\n📁 DOMAIN DISTRIBUTION")
    domain_names = {"O": "Offering", "P": "People", "J": "Journey",
                    "E": "Environment", "A": "Access", "V": "Value", "R": "Relationship"}
    for domain in "OPJEVRA":
        count = result.domain_distribution.get(domain, 0)
        pct = count / result.span_count * 100 if result.span_count > 0 else 0
        bar = "█" * int(pct / 2)
        print(f"   {domain} {domain_names.get(domain, '?'):12} {count:4} ({pct:5.1f}%) {bar}")

    # Valence distribution
    print(f"\n😊 VALENCE DISTRIBUTION")
    for val in ["V+", "V-", "V0", "V±"]:
        count = result.valence_distribution.get(val, 0)
        pct = count / result.span_count * 100 if result.span_count > 0 else 0
        print(f"   {val}: {count:4} ({pct:5.1f}%)")

    # Top primitives
    print(f"\n🔝 TOP PRIMITIVES")
    enabled_set = set(config.get("enabled", []))
    weights = config.get("weights", {})
    for prim, count in sorted(result.primitive_counts.items(), key=lambda x: -x[1])[:12]:
        pct = count / result.span_count * 100 if result.span_count > 0 else 0
        status = "✓" if prim in enabled_set else "✗"
        weight = f"({weights[prim]}x)" if prim in weights else ""
        print(f"   {status} {prim:20} {count:4} ({pct:5.1f}%) {weight}")

    # Top URT codes
    print(f"\n📋 TOP URT CODES")
    for code, count in result.top_urt_codes[:10]:
        pct = count / result.span_count * 100 if result.span_count > 0 else 0
        mapped = URT_TO_PRIMITIVE.get(code, "UNMAPPED")
        print(f"   {code}: {count:4} ({pct:5.1f}%) → {mapped}")

    # Disabled but appearing
    if result.disabled_hits:
        print(f"\n⚠️  DISABLED BUT APPEARING")
        for prim, count in sorted(result.disabled_hits.items(), key=lambda x: -x[1]):
            pct = count / result.span_count * 100 if result.span_count > 0 else 0
            print(f"   {prim}: {count} ({pct:.1f}%)")

    # Recommendations
    if result.recommendations:
        print(f"\n💡 RECOMMENDATIONS")
        for rec in result.recommendations:
            print(f"   • {rec}")

    # Brief signals check (if available)
    if brief:
        print(f"\n📝 BRIEF SIGNALS CHECK")
        what_customers_judge = brief.get("what_customers_judge", {})
        if isinstance(what_customers_judge, dict):
            items = what_customers_judge.get("items", [])
        else:
            items = what_customers_judge if isinstance(what_customers_judge, list) else []

        print(f"   Key judgment areas from brief:")
        for item in items[:5]:
            if isinstance(item, dict):
                print(f"   • {item.get('area', item)}")
            else:
                print(f"   • {item}")

    print("\n" + "=" * 70)


async def run_validation(
    sector_code: str,
    job_url_pattern: str | None = None,
    db_url: str | None = None,
):
    """Run validation for a sector."""
    # Load config
    config = load_l1_config(sector_code)
    if not config:
        print(f"❌ No L1 config found for {sector_code}")
        return None

    # Load brief
    brief = load_sector_brief(sector_code)

    # Connect to database
    db_url = db_url or os.environ.get(
        "DATABASE_URL",
        "postgresql://scraper:scraper123@localhost:5437/scraper"
    )

    pool = await asyncpg.create_pool(db_url)

    try:
        # Fetch spans
        if job_url_pattern:
            spans = await fetch_spans_for_jobs(pool, job_url_pattern)
            if not spans:
                print(f"⚠️  No spans found for jobs matching '{job_url_pattern}'")
                return None
        else:
            spans = await fetch_all_spans(pool)

        # Analyze
        result = analyze_spans(spans, config)

        # Print report
        print_validation_report(result, config, brief)

        return result

    finally:
        await pool.close()


async def run_all_validations(db_url: str | None = None):
    """Run validation for all sectors with available data."""
    # Known jobs and their sectors
    jobs_by_sector = {
        "ENTERTAINMENT": ["gokarts", "soho"],
        "AUTOMOTIVE": ["clickrent"],
        "PERSONAL_SERVICES": ["fleitas"],
        "FOOD_DINING": ["fika"],
    }

    results = {}

    for sector, job_patterns in jobs_by_sector.items():
        print(f"\n{'='*70}")
        print(f"Validating {sector}...")
        print(f"{'='*70}")

        for pattern in job_patterns:
            result = await run_validation(sector, pattern, db_url)
            if result:
                results[f"{sector}:{pattern}"] = result

    # Summary
    print("\n" + "=" * 70)
    print("VALIDATION SUMMARY")
    print("=" * 70)

    for key, result in results.items():
        sector, pattern = key.split(":")
        print(f"\n{sector} ({pattern}):")
        print(f"  Coverage: {result.enabled_coverage:.1%}")
        print(f"  Spans: {result.span_count}")
        if result.disabled_hits:
            print(f"  ⚠️ Disabled hits: {sum(result.disabled_hits.values())}")
        if result.recommendations:
            print(f"  Recommendations: {len(result.recommendations)}")


def main():
    parser = argparse.ArgumentParser(description="Validate L1 primitive configs")
    parser.add_argument("--sector", help="Sector code (e.g., ENTERTAINMENT)")
    parser.add_argument("--job-url", help="Job URL pattern to filter (e.g., 'gokarts')")
    parser.add_argument("--all", action="store_true", help="Run all validations")
    parser.add_argument("--db-url", help="Database URL")

    args = parser.parse_args()

    if args.all:
        asyncio.run(run_all_validations(args.db_url))
    elif args.sector:
        asyncio.run(run_validation(args.sector, args.job_url, args.db_url))
    else:
        parser.print_help()
        sys.exit(1)


if __name__ == "__main__":
    main()