feat: Add reviewiq-pipeline package for LLM-powered review classification

Implement a standalone Python package for processing customer reviews through a 4-stage pipeline using URT (Universal Review Taxonomy) v5.1: - Stage 1: Normalization (text cleaning, language detection, deduplication) - Stage 2: LLM Classification (OpenAI/Anthropic span extraction with URT codes) - Stage 3: Issue Routing (deterministic issue ID generation, span linking) - Stage 4: Fact Aggregation (time series metrics for dashboards) Package includes: - TypedDict contracts matching Pipeline-Contracts-v1.md - Async database layer with asyncpg and 5 SQL migrations - LLM client abstraction supporting both OpenAI and Anthropic - Sentence-transformers integration for embeddings - Validation rules V1.x through V4.x - CLI commands: migrate, run, validate, check - 55 unit and integration tests (all passing) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 18:07:11 +00:00
parent b780a23b66
commit 7d720f5378
34 changed files with 7222 additions and 0 deletions
--- a/packages/reviewiq-pipeline/tests/init.py
+++ b/packages/reviewiq-pipeline/tests/init.py
@@ -0,0 +1 @@
+"""Tests for reviewiq-pipeline."""
--- a/packages/reviewiq-pipeline/tests/conftest.py
+++ b/packages/reviewiq-pipeline/tests/conftest.py
@@ -0,0 +1,269 @@
+"""
+Pytest configuration and fixtures for reviewiq-pipeline tests.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+# Sample data fixtures matching the contract examples
+
+
+@pytest.fixture
+def sample_raw_review() -> dict[str, Any]:
+    """Sample raw review from Stage 0 output."""
+    return {
+        "review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
+        "author_name": "John Smith",
+        "author_id": "103456789012345678901",
+        "rating": 2,
+        "text": "The food was great but the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers. The server Mike was rude and dismissive when we complained. However, the steak was cooked perfectly and the dessert was amazing.",
+        "review_time": "2026-01-20T14:30:00Z",
+        "response_text": None,
+        "photos": [],
+        "raw_payload": {},
+    }
+
+
+@pytest.fixture
+def sample_scraper_output(sample_raw_review: dict) -> dict[str, Any]:
+    """Sample Stage 0 output."""
+    return {
+        "job_id": "test-job-001",
+        "status": "completed",
+        "business_id": "acme-corp",
+        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
+        "business_info": {
+            "name": "Acme Restaurant",
+            "address": "123 Main St, Anytown, USA",
+            "category": "Restaurant",
+            "total_reviews": 1247,
+            "average_rating": 4.2,
+        },
+        "reviews": [sample_raw_review],
+        "scrape_time_ms": 12500,
+        "reviews_scraped": 1,
+        "scraper_version": "v1.0.0",
+    }
+
+
+@pytest.fixture
+def sample_normalized_review() -> dict[str, Any]:
+    """Sample normalized review from Stage 1 output."""
+    return {
+        "source": "google",
+        "review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
+        "review_version": 1,
+        "business_id": "acme-corp",
+        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
+        "text": "The food was great but the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers. The server Mike was rude and dismissive when we complained. However, the steak was cooked perfectly and the dessert was amazing.",
+        "text_normalized": "the food was great but the wait was absolutely terrible we waited 45 minutes just to be seated and another 30 minutes for our appetizers the server mike was rude and dismissive when we complained however the steak was cooked perfectly and the dessert was amazing",
+        "text_language": "en",
+        "text_length": 267,
+        "word_count": 52,
+        "rating": 2,
+        "review_time": "2026-01-20T14:30:00Z",
+        "author_name": "John Smith",
+        "content_hash": "a1b2c3d4e5f6789012345678901234567890123456789012345678901234abcd",
+        "raw_id": 12345,
+    }
+
+
+@pytest.fixture
+def sample_stage1_output(sample_normalized_review: dict) -> dict[str, Any]:
+    """Sample Stage 1 output."""
+    return {
+        "job_id": "test-job-001",
+        "business_id": "acme-corp",
+        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
+        "reviews_normalized": [sample_normalized_review],
+        "stats": {
+            "input_count": 1,
+            "output_count": 1,
+            "skipped_empty": 0,
+            "skipped_duplicate": 0,
+        },
+    }
+
+
+@pytest.fixture
+def sample_span() -> dict[str, Any]:
+    """Sample extracted span."""
+    return {
+        "span_id": "SPN-b2c3d4e5f6789012",
+        "span_index": 1,
+        "span_text": "the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers",
+        "span_start": 23,
+        "span_end": 138,
+        "profile": "standard",
+        "urt_primary": "J1.01",
+        "urt_secondary": [],
+        "valence": "V-",
+        "intensity": "I3",
+        "comparative": "CR-N",
+        "specificity": "S3",
+        "actionability": "A2",
+        "temporal": "TC",
+        "evidence": "EC",
+        "confidence": "high",
+        "usn": "URT:S:J1.01:-3:32TC.EC.N",
+        "is_primary": True,
+    }
+
+
+@pytest.fixture
+def sample_classified_review(sample_span: dict) -> dict[str, Any]:
+    """Sample classified review from Stage 2 output."""
+    return {
+        "source": "google",
+        "review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
+        "review_version": 1,
+        "urt_primary": "J1.01",
+        "urt_secondary": ["P1.02"],
+        "valence": "V±",
+        "intensity": "I3",
+        "comparative": "CR-N",
+        "staff_mentions": ["Mike"],
+        "quotes": {
+            "J1.01": "waited 45 minutes just to be seated",
+            "P1.02": "rude and dismissive",
+        },
+        "trust_score": 0.85,
+        "embedding": [0.1] * 384,  # Placeholder
+        "spans": [
+            {
+                "span_id": "SPN-a1b2c3d4e5f67890",
+                "span_index": 0,
+                "span_text": "The food was great",
+                "span_start": 0,
+                "span_end": 18,
+                "profile": "standard",
+                "urt_primary": "O1.01",
+                "urt_secondary": [],
+                "valence": "V+",
+                "intensity": "I2",
+                "comparative": "CR-N",
+                "confidence": "high",
+                "usn": "URT:S:O1.01:+2:21TC.ES.N",
+                "is_primary": False,
+            },
+            sample_span,
+        ],
+        "classification_confidence": {"overall": 0.85},
+        "processing_time_ms": 500,
+    }
+
+
+@pytest.fixture
+def sample_stage2_output(sample_classified_review: dict) -> dict[str, Any]:
+    """Sample Stage 2 output."""
+    return {
+        "batch_id": "batch001",
+        "taxonomy_version": "v5.1",
+        "model_version": "gpt-4o-mini",
+        "prompt_version": "v1.0",
+        "reviews_classified": [sample_classified_review],
+        "stats": {
+            "input_count": 1,
+            "success_count": 1,
+            "error_count": 0,
+            "total_spans": 2,
+            "avg_spans_per_review": 2.0,
+            "llm_tokens_used": 1500,
+            "llm_cost_usd": 0.001,
+        },
+    }
+
+
+@pytest.fixture
+def sample_routed_span() -> dict[str, Any]:
+    """Sample routed span from Stage 3."""
+    return {
+        "span_id": "SPN-b2c3d4e5f6789012",
+        "issue_id": "ISS-7a8b9c0d1e2f3a4b",
+        "routing_key": "acme-corp|ChIJN1t_tDeuEmsRUsoyG83frY4|J1.01|",
+        "is_new_issue": True,
+    }
+
+
+@pytest.fixture
+def sample_stage3_output(sample_routed_span: dict) -> dict[str, Any]:
+    """Sample Stage 3 output."""
+    return {
+        "routed_spans": [sample_routed_span],
+        "issues_created": ["ISS-7a8b9c0d1e2f3a4b"],
+        "issues_updated": [],
+        "stats": {
+            "spans_processed": 2,
+            "spans_routed": 1,
+            "spans_skipped": 1,
+            "issues_created": 1,
+            "issues_updated": 0,
+        },
+    }
+
+
+@pytest.fixture
+def sample_fact() -> dict[str, Any]:
+    """Sample fact record from Stage 4."""
+    return {
+        "business_id": "acme-corp",
+        "place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
+        "period_date": "2026-01-20",
+        "bucket_type": "day",
+        "subject_type": "urt_code",
+        "subject_id": "J1.01",
+        "taxonomy_version": "v5.1",
+        "review_count": 1,
+        "span_count": 1,
+        "negative_count": 1,
+        "positive_count": 0,
+        "neutral_count": 0,
+        "mixed_count": 0,
+        "strength_score": 4.0,
+        "negative_strength": 4.0,
+        "positive_strength": 0.0,
+        "avg_rating": 2.0,
+        "i1_count": 0,
+        "i2_count": 0,
+        "i3_count": 1,
+        "cr_better": 0,
+        "cr_worse": 0,
+        "cr_same": 0,
+        "trust_weighted_strength": 3.4,
+        "trust_weighted_negative": 3.4,
+    }
+
+
+@pytest.fixture
+def sample_stage4_output(sample_fact: dict) -> dict[str, Any]:
+    """Sample Stage 4 output."""
+    return {
+        "facts_written": [sample_fact],
+        "stats": {
+            "business_id": "acme-corp",
+            "date": "2026-01-20",
+            "locations_processed": 1,
+            "codes_aggregated": 1,
+            "facts_upserted": 1,
+        },
+    }
+
+
+@pytest.fixture
+def fixtures_dir() -> Path:
+    """Get the path to the fixtures directory."""
+    return Path(__file__).parent / "fixtures"
+
+
+# Helper to load JSON fixtures
+def load_fixture(name: str) -> dict[str, Any]:
+    """Load a JSON fixture by name."""
+    fixtures_path = Path(__file__).parent / "fixtures" / f"{name}.json"
+    if fixtures_path.exists():
+        return json.loads(fixtures_path.read_text())
+    raise FileNotFoundError(f"Fixture not found: {name}")
--- a/packages/reviewiq-pipeline/tests/integration/init.py
+++ b/packages/reviewiq-pipeline/tests/integration/init.py
@@ -0,0 +1 @@
+"""Integration tests for reviewiq-pipeline."""
--- a/packages/reviewiq-pipeline/tests/integration/test_e2e.py
+++ b/packages/reviewiq-pipeline/tests/integration/test_e2e.py
@@ -0,0 +1,179 @@
+"""End-to-end integration tests for the pipeline."""
+
+from __future__ import annotations
+
+import pytest
+
+
+class TestPipelineE2E:
+    """End-to-end integration tests."""
+
+    def test_stage1_to_stage2_contract(self, sample_scraper_output):
+        """Test that Stage 1 output is valid Stage 2 input."""
+        from reviewiq_pipeline.config import Config
+        from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer
+        from reviewiq_pipeline.contracts import Stage1Input
+
+        config = Config()
+        normalizer = Stage1Normalizer(config)
+
+        # Run Stage 1
+        input_data = Stage1Input(
+            job_id=sample_scraper_output["job_id"],
+            business_id=sample_scraper_output["business_id"],
+            place_id=sample_scraper_output["place_id"],
+            reviews=sample_scraper_output["reviews"],
+        )
+
+        # Note: This is synchronous test, so we use the batch method
+        normalized = normalizer.normalize_batch(
+            sample_scraper_output["reviews"],
+            sample_scraper_output["business_id"],
+            sample_scraper_output["place_id"],
+        )
+
+        # Verify Stage 1 output can be used as Stage 2 input
+        assert len(normalized) > 0
+        for review in normalized:
+            # Check required fields for Stage 2
+            assert review["source"] is not None
+            assert review["review_id"] is not None
+            assert review["text"] is not None
+            assert review["text_normalized"] is not None
+            assert review["rating"] is not None
+            assert review["review_time"] is not None
+
+    def test_stage2_to_stage3_contract(self, sample_stage2_output):
+        """Test that Stage 2 output spans can be routed by Stage 3."""
+        from reviewiq_pipeline.config import Config
+        from reviewiq_pipeline.stages.stage3_route import Stage3Router
+        from reviewiq_pipeline.contracts import SpanToRoute
+
+        config = Config()
+        router = Stage3Router(config)
+
+        # Extract negative spans from Stage 2 output
+        spans_to_route = []
+        for review in sample_stage2_output["reviews_classified"]:
+            for span in review.get("spans", []):
+                if span["valence"] in ("V-", "V±"):
+                    spans_to_route.append(
+                        SpanToRoute(
+                            span_id=span["span_id"],
+                            business_id="test-business",
+                            place_id="test-place",
+                            urt_primary=span["urt_primary"],
+                            valence=span["valence"],
+                            intensity=span["intensity"],
+                            entity_normalized=span.get("entity_normalized"),
+                            review_time="2026-01-20T14:30:00Z",
+                            confidence=span.get("confidence", "medium"),
+                            trust_score=0.85,
+                        )
+                    )
+
+        # Verify we can route these spans
+        for span in spans_to_route:
+            routed = router.route_span_sync(span)
+            assert routed["span_id"] == span["span_id"]
+            assert routed["issue_id"].startswith("ISS-")
+
+    def test_validation_chain(
+        self,
+        sample_stage1_output,
+        sample_stage2_output,
+        sample_stage3_output,
+        sample_stage4_output,
+    ):
+        """Test that all sample outputs pass validation."""
+        from reviewiq_pipeline.validation.validators import (
+            validate_stage1_output,
+            validate_stage2_output,
+            validate_stage4_output,
+            Stage3Validator,
+        )
+
+        # Validate Stage 1
+        result1 = validate_stage1_output(sample_stage1_output)
+        assert result1["passed"], f"Stage 1 failed: {result1['errors']}"
+
+        # Validate Stage 2
+        result2 = validate_stage2_output(sample_stage2_output)
+        assert result2["passed"], f"Stage 2 failed: {result2['errors']}"
+
+        # Validate Stage 3 (sync version)
+        validator3 = Stage3Validator()
+        result3 = validator3.validate_sync(sample_stage3_output)
+        assert result3["passed"], f"Stage 3 failed: {result3['errors']}"
+
+        # Validate Stage 4
+        result4 = validate_stage4_output(sample_stage4_output)
+        assert result4["passed"], f"Stage 4 failed: {result4['errors']}"
+
+    def test_text_normalization_preserves_meaning(self, sample_raw_review):
+        """Test that normalization preserves review meaning."""
+        from reviewiq_pipeline.services.text_processor import TextProcessor
+
+        processor = TextProcessor()
+        result = processor.normalize(sample_raw_review["text"])
+
+        # Key terms should still be present (lowercased)
+        assert "food" in result.normalized
+        assert "wait" in result.normalized
+        assert "terrible" in result.normalized
+        assert "mike" in result.normalized
+        assert "steak" in result.normalized
+
+    def test_issue_id_determinism(self):
+        """Test that same inputs always produce same issue ID."""
+        from reviewiq_pipeline.config import Config
+        from reviewiq_pipeline.stages.stage3_route import Stage3Router
+
+        config = Config()
+        router = Stage3Router(config)
+
+        span = {
+            "span_id": "test-span",
+            "business_id": "acme-corp",
+            "place_id": "place123",
+            "urt_primary": "J1.01",
+            "valence": "V-",
+            "intensity": "I3",
+            "entity_normalized": "mike",
+            "review_time": "2026-01-20T14:30:00Z",
+            "confidence": "high",
+            "trust_score": 0.85,
+        }
+
+        # Route the same span multiple times
+        ids = [router.route_span_sync(span)["issue_id"] for _ in range(10)]
+
+        # All IDs should be identical
+        assert len(set(ids)) == 1
+
+
+@pytest.mark.asyncio
+class TestAsyncPipeline:
+    """Async pipeline tests (require database)."""
+
+    @pytest.mark.skip(reason="Requires database connection")
+    async def test_full_pipeline_flow(self, sample_scraper_output):
+        """Test full pipeline from scraper output to facts."""
+        from reviewiq_pipeline import Pipeline, Config
+
+        config = Config(
+            database_url="postgresql://localhost:5432/reviewiq_test",
+            llm_provider="openai",
+        )
+
+        pipeline = Pipeline(config)
+
+        try:
+            await pipeline.initialize()
+            result = await pipeline.process(sample_scraper_output)
+
+            assert result.stage1 is not None
+            assert result.success or len(result.validation) > 0
+
+        finally:
+            await pipeline.close()
--- a/packages/reviewiq-pipeline/tests/test_stage1.py
+++ b/packages/reviewiq-pipeline/tests/test_stage1.py
@@ -0,0 +1,218 @@
+"""Tests for Stage 1: Normalization."""
+
+from __future__ import annotations
+
+import pytest
+
+from reviewiq_pipeline.services.text_processor import (
+    TextProcessor,
+    is_valid_iso639,
+    is_valid_sha256,
+)
+from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer
+from reviewiq_pipeline.validation.validators import validate_stage1_output
+
+
+class TestTextProcessor:
+    """Tests for the TextProcessor service."""
+
+    def test_normalize_basic(self):
+        """Test basic text normalization."""
+        processor = TextProcessor()
+        result = processor.normalize("  Hello   World!  ")
+
+        assert result.normalized == "hello world!"
+        assert result.word_count == 2
+        assert result.char_count == 12
+
+    def test_normalize_unicode(self):
+        """Test Unicode normalization."""
+        processor = TextProcessor()
+        # NFC normalization test
+        result = processor.normalize("café")
+
+        assert "cafe" in result.normalized or "café" in result.normalized
+
+    def test_normalize_control_chars(self):
+        """Test removal of control characters."""
+        processor = TextProcessor()
+        result = processor.normalize("Hello\x00World\x1fTest")
+
+        assert "\x00" not in result.normalized
+        assert "\x1f" not in result.normalized
+
+    def test_detect_language_english(self):
+        """Test English language detection."""
+        processor = TextProcessor()
+        lang = processor.detect_language("This is a test sentence in English.")
+
+        assert lang == "en"
+
+    def test_generate_content_hash(self):
+        """Test content hash generation."""
+        processor = TextProcessor()
+        hash1 = processor.generate_content_hash("test content")
+        hash2 = processor.generate_content_hash("test content")
+        hash3 = processor.generate_content_hash("different content")
+
+        assert hash1 == hash2  # Same input = same hash
+        assert hash1 != hash3  # Different input = different hash
+        assert len(hash1) == 64  # SHA256 hex length
+
+    def test_is_empty_or_trivial(self):
+        """Test empty/trivial text detection."""
+        processor = TextProcessor()
+
+        assert processor.is_empty_or_trivial(None) is True
+        assert processor.is_empty_or_trivial("") is True
+        assert processor.is_empty_or_trivial("  ") is True
+        assert processor.is_empty_or_trivial("ab") is True
+        assert processor.is_empty_or_trivial("abc") is False
+        assert processor.is_empty_or_trivial("Hello world") is False
+
+
+class TestHelperFunctions:
+    """Tests for helper functions."""
+
+    def test_is_valid_iso639(self):
+        """Test ISO 639-1 validation."""
+        assert is_valid_iso639("en") is True
+        assert is_valid_iso639("es") is True
+        assert is_valid_iso639("fr") is True
+        assert is_valid_iso639("de") is True
+        assert is_valid_iso639("xx") is False
+        assert is_valid_iso639("") is False
+        assert is_valid_iso639("english") is False
+
+    def test_is_valid_sha256(self):
+        """Test SHA256 hash validation."""
+        valid_hash = "a" * 64
+        invalid_short = "a" * 63
+        invalid_long = "a" * 65
+        invalid_chars = "g" * 64  # 'g' is not hex
+
+        assert is_valid_sha256(valid_hash) is True
+        assert is_valid_sha256(invalid_short) is False
+        assert is_valid_sha256(invalid_long) is False
+        assert is_valid_sha256(invalid_chars) is False
+        assert is_valid_sha256("") is False
+        assert is_valid_sha256(None) is False  # type: ignore
+
+
+class TestStage1Normalizer:
+    """Tests for Stage 1 normalizer."""
+
+    def test_normalize_review_basic(self, sample_raw_review):
+        """Test basic review normalization."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        normalizer = Stage1Normalizer(config)
+
+        result = normalizer._normalize_review(
+            sample_raw_review,
+            "test-business",
+            "test-place",
+        )
+
+        assert result is not None
+        assert result["source"] == "google"
+        assert result["review_id"] == sample_raw_review["review_id"]
+        assert result["text"] == sample_raw_review["text"]
+        assert result["text_normalized"] is not None
+        assert result["text_language"] == "en"
+        assert len(result["content_hash"]) == 64
+
+    def test_normalize_empty_review(self):
+        """Test that empty reviews are skipped."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        normalizer = Stage1Normalizer(config)
+
+        empty_review = {
+            "review_id": "test-empty",
+            "author_name": "Test",
+            "rating": 5,
+            "text": "",
+            "review_time": "2026-01-20T14:30:00Z",
+        }
+
+        result = normalizer._normalize_review(empty_review, "test-business", "test-place")
+        assert result is None
+
+    def test_normalize_batch(self, sample_raw_review):
+        """Test batch normalization."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        normalizer = Stage1Normalizer(config)
+
+        reviews = [
+            sample_raw_review,
+            {
+                "review_id": "second-review",
+                "author_name": "Jane",
+                "rating": 5,
+                "text": "Great service!",
+                "review_time": "2026-01-21T10:00:00Z",
+            },
+        ]
+
+        results = normalizer.normalize_batch(reviews, "test-business", "test-place")
+        assert len(results) == 2
+
+    def test_normalize_deduplication(self, sample_raw_review):
+        """Test that duplicate reviews are detected."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        normalizer = Stage1Normalizer(config)
+
+        # Two reviews with same text
+        reviews = [
+            sample_raw_review,
+            {**sample_raw_review, "review_id": "duplicate-review"},
+        ]
+
+        results = normalizer.normalize_batch(reviews, "test-business", "test-place")
+        assert len(results) == 1  # Duplicate should be filtered
+
+
+class TestStage1Validation:
+    """Tests for Stage 1 validation."""
+
+    def test_validate_valid_output(self, sample_stage1_output):
+        """Test validation of valid Stage 1 output."""
+        result = validate_stage1_output(sample_stage1_output)
+
+        assert result["stage"] == "stage1"
+        assert result["passed"] is True
+        assert result["error_count"] == 0
+
+    def test_validate_empty_text(self, sample_stage1_output):
+        """Test validation catches empty text."""
+        sample_stage1_output["reviews_normalized"][0]["text"] = ""
+
+        result = validate_stage1_output(sample_stage1_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V1.1" for e in result["errors"])
+
+    def test_validate_invalid_hash(self, sample_stage1_output):
+        """Test validation catches invalid content hash."""
+        sample_stage1_output["reviews_normalized"][0]["content_hash"] = "invalid"
+
+        result = validate_stage1_output(sample_stage1_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V1.3" for e in result["errors"])
+
+    def test_validate_invalid_language(self, sample_stage1_output):
+        """Test validation catches invalid language code."""
+        sample_stage1_output["reviews_normalized"][0]["text_language"] = "invalid"
+
+        result = validate_stage1_output(sample_stage1_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V1.5" for e in result["errors"])
--- a/packages/reviewiq-pipeline/tests/test_stage2.py
+++ b/packages/reviewiq-pipeline/tests/test_stage2.py
@@ -0,0 +1,193 @@
+"""Tests for Stage 2: LLM Classification."""
+
+from __future__ import annotations
+
+import pytest
+
+from reviewiq_pipeline.services.llm_client import create_fallback_response
+from reviewiq_pipeline.validation.validators import validate_stage2_output
+
+
+class TestLLMClient:
+    """Tests for LLM client functionality."""
+
+    def test_fallback_response_structure(self):
+        """Test that fallback response has correct structure."""
+        review_text = "This is a test review."
+        response = create_fallback_response(review_text)
+
+        assert "spans" in response
+        assert "review_summary" in response
+        assert len(response["spans"]) == 1
+
+        span = response["spans"][0]
+        assert span["span_index"] == 0
+        assert span["span_text"] == review_text
+        assert span["span_start"] == 0
+        assert span["span_end"] == len(review_text)
+        assert span["is_primary"] is True
+        assert span["confidence"] == "low"
+
+    def test_fallback_response_valid_urt(self):
+        """Test that fallback response has valid URT codes."""
+        response = create_fallback_response("Test review")
+        span = response["spans"][0]
+
+        assert span["urt_primary"] == "O1.01"
+        assert span["valence"] == "V0"
+        assert span["intensity"] == "I1"
+
+
+class TestStage2Validation:
+    """Tests for Stage 2 validation."""
+
+    def test_validate_valid_output(self, sample_stage2_output):
+        """Test validation of valid Stage 2 output."""
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["stage"] == "stage2"
+        assert result["passed"] is True
+        assert result["error_count"] == 0
+
+    def test_validate_invalid_urt_code(self, sample_stage2_output):
+        """Test validation catches invalid URT code."""
+        sample_stage2_output["reviews_classified"][0]["urt_primary"] = "INVALID"
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.1" for e in result["errors"])
+
+    def test_validate_too_many_secondary(self, sample_stage2_output):
+        """Test validation catches too many secondary codes."""
+        sample_stage2_output["reviews_classified"][0]["urt_secondary"] = [
+            "O1.01", "O1.02", "O1.03"
+        ]
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.2" for e in result["errors"])
+
+    def test_validate_invalid_valence(self, sample_stage2_output):
+        """Test validation catches invalid valence."""
+        sample_stage2_output["reviews_classified"][0]["valence"] = "INVALID"
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.3" for e in result["errors"])
+
+    def test_validate_invalid_trust_score(self, sample_stage2_output):
+        """Test validation catches trust score out of bounds."""
+        sample_stage2_output["reviews_classified"][0]["trust_score"] = 0.1
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.9" for e in result["errors"])
+
+    def test_validate_invalid_embedding_dim(self, sample_stage2_output):
+        """Test validation catches wrong embedding dimension."""
+        sample_stage2_output["reviews_classified"][0]["embedding"] = [0.1] * 100
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.10" for e in result["errors"])
+
+    def test_validate_multiple_primaries(self, sample_stage2_output):
+        """Test validation catches multiple primary spans."""
+        for span in sample_stage2_output["reviews_classified"][0]["spans"]:
+            span["is_primary"] = True
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.8" for e in result["errors"])
+
+    def test_validate_no_primary(self, sample_stage2_output):
+        """Test validation catches no primary span."""
+        for span in sample_stage2_output["reviews_classified"][0]["spans"]:
+            span["is_primary"] = False
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.8" for e in result["errors"])
+
+    def test_validate_invalid_span_bounds(self, sample_stage2_output):
+        """Test validation catches invalid span bounds."""
+        sample_stage2_output["reviews_classified"][0]["spans"][0]["span_start"] = 100
+        sample_stage2_output["reviews_classified"][0]["spans"][0]["span_end"] = 50
+
+        result = validate_stage2_output(sample_stage2_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V2.5" for e in result["errors"])
+
+
+class TestSpanExtraction:
+    """Tests for span extraction logic."""
+
+    def test_primary_span_selection_by_intensity(self):
+        """Test that primary span is selected by highest intensity."""
+        from reviewiq_pipeline.stages.stage2_classify import Stage2Classifier
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        classifier = Stage2Classifier(config)
+
+        spans = [
+            {
+                "span_id": "span1",
+                "span_index": 0,
+                "valence": "V-",
+                "intensity": "I1",
+                "is_primary": False,
+            },
+            {
+                "span_id": "span2",
+                "span_index": 1,
+                "valence": "V-",
+                "intensity": "I3",
+                "is_primary": False,
+            },
+        ]
+
+        result = classifier._ensure_primary_span(spans)
+
+        # Span with I3 should be primary
+        assert result[1]["is_primary"] is True
+        assert result[0]["is_primary"] is False
+
+    def test_primary_span_selection_by_valence(self):
+        """Test that negative valence is preferred over positive."""
+        from reviewiq_pipeline.stages.stage2_classify import Stage2Classifier
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        classifier = Stage2Classifier(config)
+
+        spans = [
+            {
+                "span_id": "span1",
+                "span_index": 0,
+                "valence": "V+",
+                "intensity": "I2",
+                "is_primary": False,
+            },
+            {
+                "span_id": "span2",
+                "span_index": 1,
+                "valence": "V-",
+                "intensity": "I2",
+                "is_primary": False,
+            },
+        ]
+
+        result = classifier._ensure_primary_span(spans)
+
+        # Span with V- should be primary
+        assert result[1]["is_primary"] is True
+        assert result[0]["is_primary"] is False
--- a/packages/reviewiq-pipeline/tests/test_stage3.py
+++ b/packages/reviewiq-pipeline/tests/test_stage3.py
@@ -0,0 +1,162 @@
+"""Tests for Stage 3: Issue Routing."""
+
+from __future__ import annotations
+
+import pytest
+
+from reviewiq_pipeline.stages.stage3_route import Stage3Router
+from reviewiq_pipeline.validation.validators import Stage3Validator
+
+
+class TestStage3Router:
+    """Tests for Stage 3 router."""
+
+    def test_generate_routing_key(self):
+        """Test routing key generation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        router = Stage3Router(config)
+
+        span = {
+            "span_id": "test-span",
+            "business_id": "acme-corp",
+            "place_id": "place123",
+            "urt_primary": "J1.01",
+            "valence": "V-",
+            "intensity": "I3",
+            "entity_normalized": "mike",
+            "review_time": "2026-01-20T14:30:00Z",
+            "confidence": "high",
+            "trust_score": 0.85,
+        }
+
+        key = router._generate_routing_key(span)
+        assert key == "acme-corp|place123|J1.01|mike"
+
+    def test_generate_routing_key_no_entity(self):
+        """Test routing key generation without entity."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        router = Stage3Router(config)
+
+        span = {
+            "span_id": "test-span",
+            "business_id": "acme-corp",
+            "place_id": "place123",
+            "urt_primary": "J1.01",
+            "valence": "V-",
+            "intensity": "I3",
+            "entity_normalized": None,
+            "review_time": "2026-01-20T14:30:00Z",
+            "confidence": "high",
+            "trust_score": 0.85,
+        }
+
+        key = router._generate_routing_key(span)
+        assert key == "acme-corp|place123|J1.01|"
+
+    def test_generate_issue_id(self):
+        """Test deterministic issue ID generation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        router = Stage3Router(config)
+
+        key1 = "acme-corp|place123|J1.01|"
+        key2 = "acme-corp|place123|J1.01|"
+        key3 = "acme-corp|place123|J1.02|"
+
+        id1 = router._generate_issue_id(key1)
+        id2 = router._generate_issue_id(key2)
+        id3 = router._generate_issue_id(key3)
+
+        # Same key = same ID
+        assert id1 == id2
+        # Different key = different ID
+        assert id1 != id3
+        # Format check
+        assert id1.startswith("ISS-")
+        assert len(id1) == 20  # ISS- + 16 hex chars
+
+    def test_route_span_sync(self):
+        """Test synchronous span routing."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        router = Stage3Router(config)
+
+        span = {
+            "span_id": "test-span",
+            "business_id": "acme-corp",
+            "place_id": "place123",
+            "urt_primary": "J1.01",
+            "valence": "V-",
+            "intensity": "I3",
+            "entity_normalized": None,
+            "review_time": "2026-01-20T14:30:00Z",
+            "confidence": "high",
+            "trust_score": 0.85,
+        }
+
+        result = router.route_span_sync(span)
+
+        assert result["span_id"] == "test-span"
+        assert result["issue_id"].startswith("ISS-")
+        assert "J1.01" in result["routing_key"]
+
+    def test_route_span_rejects_positive(self):
+        """Test that positive spans cannot be routed."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        router = Stage3Router(config)
+
+        span = {
+            "span_id": "test-span",
+            "business_id": "acme-corp",
+            "place_id": "place123",
+            "urt_primary": "O1.01",
+            "valence": "V+",  # Positive
+            "intensity": "I2",
+            "entity_normalized": None,
+            "review_time": "2026-01-20T14:30:00Z",
+            "confidence": "high",
+            "trust_score": 0.85,
+        }
+
+        with pytest.raises(ValueError, match="Cannot route positive span"):
+            router.route_span_sync(span)
+
+
+class TestStage3Validation:
+    """Tests for Stage 3 validation."""
+
+    def test_validate_valid_output(self, sample_stage3_output):
+        """Test validation of valid Stage 3 output."""
+        validator = Stage3Validator()
+        result = validator.validate_sync(sample_stage3_output)
+
+        assert result["stage"] == "stage3"
+        assert result["passed"] is True
+
+    def test_validate_invalid_issue_id(self, sample_stage3_output):
+        """Test validation catches invalid issue ID format."""
+        sample_stage3_output["routed_spans"][0]["issue_id"] = "INVALID"
+
+        validator = Stage3Validator()
+        result = validator.validate_sync(sample_stage3_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V3.1" for e in result["errors"])
+
+    def test_validate_empty_routing_key(self, sample_stage3_output):
+        """Test validation catches empty routing key."""
+        sample_stage3_output["routed_spans"][0]["routing_key"] = ""
+
+        validator = Stage3Validator()
+        result = validator.validate_sync(sample_stage3_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V3.2" for e in result["errors"])
--- a/packages/reviewiq-pipeline/tests/test_stage4.py
+++ b/packages/reviewiq-pipeline/tests/test_stage4.py
@@ -0,0 +1,201 @@
+"""Tests for Stage 4: Fact Aggregation."""
+
+from __future__ import annotations
+
+from datetime import date
+
+import pytest
+
+from reviewiq_pipeline.stages.stage4_aggregate import Stage4Aggregator
+from reviewiq_pipeline.validation.validators import validate_stage4_output
+
+
+class TestStage4Aggregator:
+    """Tests for Stage 4 aggregator."""
+
+    def test_get_bucket_range_day(self):
+        """Test day bucket range calculation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        aggregator = Stage4Aggregator(config)
+
+        target = date(2026, 1, 20)
+        start, end = aggregator._get_bucket_range(target, "day")
+
+        assert start == target
+        assert end == target
+
+    def test_get_bucket_range_week(self):
+        """Test week bucket range calculation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        aggregator = Stage4Aggregator(config)
+
+        # 2026-01-20 is a Tuesday
+        target = date(2026, 1, 20)
+        start, end = aggregator._get_bucket_range(target, "week")
+
+        # Week should start on Monday (Jan 19) and end on Sunday (Jan 25)
+        assert start == date(2026, 1, 19)
+        assert end == date(2026, 1, 25)
+
+    def test_get_bucket_range_month(self):
+        """Test month bucket range calculation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        aggregator = Stage4Aggregator(config)
+
+        target = date(2026, 1, 20)
+        start, end = aggregator._get_bucket_range(target, "month")
+
+        assert start == date(2026, 1, 1)
+        assert end == date(2026, 1, 31)
+
+    def test_compute_strength_score(self):
+        """Test strength score calculation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        aggregator = Stage4Aggregator(config)
+
+        spans = [
+            {"valence": "V-", "intensity": "I3"},  # 4 * 1.0 = 4
+            {"valence": "V-", "intensity": "I2"},  # 2 * 1.0 = 2
+            {"valence": "V+", "intensity": "I2"},  # 2 * 1.0 = 2
+            {"valence": "V0", "intensity": "I1"},  # 1 * 0.0 = 0
+        ]
+
+        score = aggregator._compute_strength_score(spans)
+        assert score == 8.0
+
+    def test_compute_trust_weighted_strength(self):
+        """Test trust-weighted strength calculation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        aggregator = Stage4Aggregator(config)
+
+        spans = [
+            {"valence": "V-", "intensity": "I3", "trust_score": 1.0},  # 4 * 1.0 * 1.0 = 4
+            {"valence": "V-", "intensity": "I2", "trust_score": 0.5},  # 2 * 1.0 * 0.5 = 1
+        ]
+
+        score = aggregator._compute_trust_weighted_strength(spans)
+        assert score == 5.0
+
+    def test_compute_fact_metrics(self):
+        """Test fact metrics computation."""
+        from reviewiq_pipeline.config import Config
+
+        config = Config()
+        aggregator = Stage4Aggregator(config)
+
+        spans = [
+            {
+                "valence": "V-",
+                "intensity": "I3",
+                "comparative": "CR-N",
+                "trust_score": 0.8,
+                "rating": 2,
+            },
+            {
+                "valence": "V+",
+                "intensity": "I2",
+                "comparative": "CR-B",
+                "trust_score": 0.9,
+                "rating": 5,
+            },
+        ]
+
+        fact = aggregator._compute_fact_metrics(
+            spans,
+            "test-business",
+            "test-place",
+            "2026-01-20",
+            "day",
+            "urt_code",
+            "J1.01",
+            "v5.1",
+        )
+
+        assert fact["span_count"] == 2
+        assert fact["negative_count"] == 1
+        assert fact["positive_count"] == 1
+        assert fact["i3_count"] == 1
+        assert fact["i2_count"] == 1
+        assert fact["cr_better"] == 1
+        assert fact["avg_rating"] == 3.5
+
+
+class TestStage4Validation:
+    """Tests for Stage 4 validation."""
+
+    def test_validate_valid_output(self, sample_stage4_output):
+        """Test validation of valid Stage 4 output."""
+        result = validate_stage4_output(sample_stage4_output)
+
+        assert result["stage"] == "stage4"
+        assert result["passed"] is True
+
+    def test_validate_span_less_than_review(self, sample_stage4_output):
+        """Test validation catches span_count < review_count."""
+        sample_stage4_output["facts_written"][0]["span_count"] = 0
+        sample_stage4_output["facts_written"][0]["review_count"] = 1
+
+        result = validate_stage4_output(sample_stage4_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V4.3" for e in result["errors"])
+
+    def test_validate_valence_sum(self, sample_stage4_output):
+        """Test validation catches valence sum mismatch."""
+        # Set span_count to 5 but valence counts only sum to 1
+        sample_stage4_output["facts_written"][0]["span_count"] = 5
+
+        result = validate_stage4_output(sample_stage4_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V4.4" for e in result["errors"])
+
+    def test_validate_intensity_sum(self, sample_stage4_output):
+        """Test validation catches intensity sum mismatch."""
+        # Set span_count to 5 but intensity counts only sum to 1
+        sample_stage4_output["facts_written"][0]["span_count"] = 5
+        # Fix valence sum
+        sample_stage4_output["facts_written"][0]["negative_count"] = 5
+
+        result = validate_stage4_output(sample_stage4_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V4.5" for e in result["errors"])
+
+    def test_validate_negative_strength(self, sample_stage4_output):
+        """Test validation catches negative strength score."""
+        sample_stage4_output["facts_written"][0]["strength_score"] = -1.0
+
+        result = validate_stage4_output(sample_stage4_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V4.6" for e in result["errors"])
+
+    def test_validate_invalid_rating(self, sample_stage4_output):
+        """Test validation catches invalid average rating."""
+        sample_stage4_output["facts_written"][0]["avg_rating"] = 6.0
+
+        result = validate_stage4_output(sample_stage4_output)
+
+        assert result["passed"] is False
+        assert any(e["rule"] == "V4.7" for e in result["errors"])
+
+    def test_validate_null_rating_allowed(self, sample_stage4_output):
+        """Test that NULL rating is allowed."""
+        sample_stage4_output["facts_written"][0]["avg_rating"] = None
+
+        result = validate_stage4_output(sample_stage4_output)
+
+        # Should still pass (NULL is valid)
+        # Check no V4.7 errors
+        assert not any(e["rule"] == "V4.7" for e in result["errors"])
				`@@ -0,0 +1 @@`
				`"""Integration tests for reviewiq-pipeline."""`