whyrating-engine-legacy/packages/reviewiq-pipeline/tests/conftest.py

"""
Pytest configuration and fixtures for reviewiq-pipeline tests.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

import pytest

# Sample data fixtures matching the contract examples


@pytest.fixture
def sample_raw_review() -> dict[str, Any]:
    """Sample raw review from Stage 0 output."""
    return {
        "review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
        "author_name": "John Smith",
        "author_id": "103456789012345678901",
        "rating": 2,
        "text": "The food was great but the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers. The server Mike was rude and dismissive when we complained. However, the steak was cooked perfectly and the dessert was amazing.",
        "review_time": "2026-01-20T14:30:00Z",
        "response_text": None,
        "photos": [],
        "raw_payload": {},
    }


@pytest.fixture
def sample_scraper_output(sample_raw_review: dict) -> dict[str, Any]:
    """Sample Stage 0 output."""
    return {
        "job_id": "test-job-001",
        "status": "completed",
        "business_id": "acme-corp",
        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
        "business_info": {
            "name": "Acme Restaurant",
            "address": "123 Main St, Anytown, USA",
            "category": "Restaurant",
            "total_reviews": 1247,
            "average_rating": 4.2,
        },
        "reviews": [sample_raw_review],
        "scrape_time_ms": 12500,
        "reviews_scraped": 1,
        "scraper_version": "v1.0.0",
    }


@pytest.fixture
def sample_normalized_review() -> dict[str, Any]:
    """Sample normalized review from Stage 1 output."""
    return {
        "source": "google",
        "review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
        "review_version": 1,
        "business_id": "acme-corp",
        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
        "text": "The food was great but the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers. The server Mike was rude and dismissive when we complained. However, the steak was cooked perfectly and the dessert was amazing.",
        "text_normalized": "the food was great but the wait was absolutely terrible we waited 45 minutes just to be seated and another 30 minutes for our appetizers the server mike was rude and dismissive when we complained however the steak was cooked perfectly and the dessert was amazing",
        "text_language": "en",
        "text_length": 267,
        "word_count": 52,
        "rating": 2,
        "review_time": "2026-01-20T14:30:00Z",
        "author_name": "John Smith",
        "content_hash": "a1b2c3d4e5f6789012345678901234567890123456789012345678901234abcd",
        "raw_id": 12345,
    }


@pytest.fixture
def sample_stage1_output(sample_normalized_review: dict) -> dict[str, Any]:
    """Sample Stage 1 output."""
    return {
        "job_id": "test-job-001",
        "business_id": "acme-corp",
        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
        "reviews_normalized": [sample_normalized_review],
        "stats": {
            "input_count": 1,
            "output_count": 1,
            "skipped_empty": 0,
            "skipped_duplicate": 0,
        },
    }


@pytest.fixture
def sample_span() -> dict[str, Any]:
    """Sample extracted span."""
    return {
        "span_id": "SPN-b2c3d4e5f6789012",
        "span_index": 1,
        "span_text": "the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers",
        "span_start": 23,
        "span_end": 138,
        "profile": "standard",
        "urt_primary": "J1.01",
        "urt_secondary": [],
        "valence": "V-",
        "intensity": "I3",
        "comparative": "CR-N",
        "specificity": "S3",
        "actionability": "A2",
        "temporal": "TC",
        "evidence": "EC",
        "confidence": "high",
        "usn": "URT:S:J1.01:-3:32TC.EC.N",
        "is_primary": True,
    }


@pytest.fixture
def sample_classified_review(sample_span: dict) -> dict[str, Any]:
    """Sample classified review from Stage 2 output."""
    return {
        "source": "google",
        "review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
        "review_version": 1,
        "urt_primary": "J1.01",
        "urt_secondary": ["P1.02"],
        "valence": "V±",
        "intensity": "I3",
        "comparative": "CR-N",
        "staff_mentions": ["Mike"],
        "quotes": {
            "J1.01": "waited 45 minutes just to be seated",
            "P1.02": "rude and dismissive",
        },
        "trust_score": 0.85,
        "embedding": [0.1] * 384,  # Placeholder
        "spans": [
            {
                "span_id": "SPN-a1b2c3d4e5f67890",
                "span_index": 0,
                "span_text": "The food was great",
                "span_start": 0,
                "span_end": 18,
                "profile": "standard",
                "urt_primary": "O1.01",
                "urt_secondary": [],
                "valence": "V+",
                "intensity": "I2",
                "comparative": "CR-N",
                "confidence": "high",
                "usn": "URT:S:O1.01:+2:21TC.ES.N",
                "is_primary": False,
            },
            sample_span,
        ],
        "classification_confidence": {"overall": 0.85},
        "processing_time_ms": 500,
    }


@pytest.fixture
def sample_stage2_output(sample_classified_review: dict) -> dict[str, Any]:
    """Sample Stage 2 output."""
    return {
        "batch_id": "batch001",
        "taxonomy_version": "v5.1",
        "model_version": "gpt-4o-mini",
        "prompt_version": "v1.0",
        "reviews_classified": [sample_classified_review],
        "stats": {
            "input_count": 1,
            "success_count": 1,
            "error_count": 0,
            "total_spans": 2,
            "avg_spans_per_review": 2.0,
            "llm_tokens_used": 1500,
            "llm_cost_usd": 0.001,
        },
    }


@pytest.fixture
def sample_routed_span() -> dict[str, Any]:
    """Sample routed span from Stage 3."""
    return {
        "span_id": "SPN-b2c3d4e5f6789012",
        "issue_id": "ISS-7a8b9c0d1e2f3a4b",
        "routing_key": "acme-corp|ChIJN1t_tDeuEmsRUsoyG83frY4|J1.01|",
        "is_new_issue": True,
    }


@pytest.fixture
def sample_stage3_output(sample_routed_span: dict) -> dict[str, Any]:
    """Sample Stage 3 output."""
    return {
        "routed_spans": [sample_routed_span],
        "issues_created": ["ISS-7a8b9c0d1e2f3a4b"],
        "issues_updated": [],
        "stats": {
            "spans_processed": 2,
            "spans_routed": 1,
            "spans_skipped": 1,
            "issues_created": 1,
            "issues_updated": 0,
        },
    }


@pytest.fixture
def sample_fact() -> dict[str, Any]:
    """Sample fact record from Stage 4."""
    return {
        "business_id": "acme-corp",
        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
        "period_date": "2026-01-20",
        "bucket_type": "day",
        "subject_type": "urt_code",
        "subject_id": "J1.01",
        "taxonomy_version": "v5.1",
        "review_count": 1,
        "span_count": 1,
        "negative_count": 1,
        "positive_count": 0,
        "neutral_count": 0,
        "mixed_count": 0,
        "strength_score": 4.0,
        "negative_strength": 4.0,
        "positive_strength": 0.0,
        "avg_rating": 2.0,
        "i1_count": 0,
        "i2_count": 0,
        "i3_count": 1,
        "cr_better": 0,
        "cr_worse": 0,
        "cr_same": 0,
        "trust_weighted_strength": 3.4,
        "trust_weighted_negative": 3.4,
    }


@pytest.fixture
def sample_stage4_output(sample_fact: dict) -> dict[str, Any]:
    """Sample Stage 4 output."""
    return {
        "facts_written": [sample_fact],
        "stats": {
            "business_id": "acme-corp",
            "date": "2026-01-20",
            "locations_processed": 1,
            "codes_aggregated": 1,
            "facts_upserted": 1,
        },
    }


@pytest.fixture
def fixtures_dir() -> Path:
    """Get the path to the fixtures directory."""
    return Path(__file__).parent / "fixtures"


# Helper to load JSON fixtures
def load_fixture(name: str) -> dict[str, Any]:
    """Load a JSON fixture by name."""
    fixtures_path = Path(__file__).parent / "fixtures" / f"{name}.json"
    if fixtures_path.exists():
        return json.loads(fixtures_path.read_text())
    raise FileNotFoundError(f"Fixture not found: {name}")