feat: Add reviewiq-pipeline package for LLM-powered review classification

Implement a standalone Python package for processing customer reviews through
a 4-stage pipeline using URT (Universal Review Taxonomy) v5.1:

- Stage 1: Normalization (text cleaning, language detection, deduplication)
- Stage 2: LLM Classification (OpenAI/Anthropic span extraction with URT codes)
- Stage 3: Issue Routing (deterministic issue ID generation, span linking)
- Stage 4: Fact Aggregation (time series metrics for dashboards)

Package includes:
- TypedDict contracts matching Pipeline-Contracts-v1.md
- Async database layer with asyncpg and 5 SQL migrations
- LLM client abstraction supporting both OpenAI and Anthropic
- Sentence-transformers integration for embeddings
- Validation rules V1.x through V4.x
- CLI commands: migrate, run, validate, check
- 55 unit and integration tests (all passing)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 18:07:11 +00:00
parent b780a23b66
commit 7d720f5378
34 changed files with 7222 additions and 0 deletions

View File

@@ -0,0 +1 @@
"""Tests for reviewiq-pipeline."""

View File

@@ -0,0 +1,269 @@
"""
Pytest configuration and fixtures for reviewiq-pipeline tests.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import pytest
# Sample data fixtures matching the contract examples
@pytest.fixture
def sample_raw_review() -> dict[str, Any]:
"""Sample raw review from Stage 0 output."""
return {
"review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
"author_name": "John Smith",
"author_id": "103456789012345678901",
"rating": 2,
"text": "The food was great but the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers. The server Mike was rude and dismissive when we complained. However, the steak was cooked perfectly and the dessert was amazing.",
"review_time": "2026-01-20T14:30:00Z",
"response_text": None,
"photos": [],
"raw_payload": {},
}
@pytest.fixture
def sample_scraper_output(sample_raw_review: dict) -> dict[str, Any]:
"""Sample Stage 0 output."""
return {
"job_id": "test-job-001",
"status": "completed",
"business_id": "acme-corp",
"place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
"business_info": {
"name": "Acme Restaurant",
"address": "123 Main St, Anytown, USA",
"category": "Restaurant",
"total_reviews": 1247,
"average_rating": 4.2,
},
"reviews": [sample_raw_review],
"scrape_time_ms": 12500,
"reviews_scraped": 1,
"scraper_version": "v1.0.0",
}
@pytest.fixture
def sample_normalized_review() -> dict[str, Any]:
"""Sample normalized review from Stage 1 output."""
return {
"source": "google",
"review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
"review_version": 1,
"business_id": "acme-corp",
"place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
"text": "The food was great but the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers. The server Mike was rude and dismissive when we complained. However, the steak was cooked perfectly and the dessert was amazing.",
"text_normalized": "the food was great but the wait was absolutely terrible we waited 45 minutes just to be seated and another 30 minutes for our appetizers the server mike was rude and dismissive when we complained however the steak was cooked perfectly and the dessert was amazing",
"text_language": "en",
"text_length": 267,
"word_count": 52,
"rating": 2,
"review_time": "2026-01-20T14:30:00Z",
"author_name": "John Smith",
"content_hash": "a1b2c3d4e5f6789012345678901234567890123456789012345678901234abcd",
"raw_id": 12345,
}
@pytest.fixture
def sample_stage1_output(sample_normalized_review: dict) -> dict[str, Any]:
"""Sample Stage 1 output."""
return {
"job_id": "test-job-001",
"business_id": "acme-corp",
"place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
"reviews_normalized": [sample_normalized_review],
"stats": {
"input_count": 1,
"output_count": 1,
"skipped_empty": 0,
"skipped_duplicate": 0,
},
}
@pytest.fixture
def sample_span() -> dict[str, Any]:
"""Sample extracted span."""
return {
"span_id": "SPN-b2c3d4e5f6789012",
"span_index": 1,
"span_text": "the wait was absolutely terrible. We waited 45 minutes just to be seated, and another 30 minutes for our appetizers",
"span_start": 23,
"span_end": 138,
"profile": "standard",
"urt_primary": "J1.01",
"urt_secondary": [],
"valence": "V-",
"intensity": "I3",
"comparative": "CR-N",
"specificity": "S3",
"actionability": "A2",
"temporal": "TC",
"evidence": "EC",
"confidence": "high",
"usn": "URT:S:J1.01:-3:32TC.EC.N",
"is_primary": True,
}
@pytest.fixture
def sample_classified_review(sample_span: dict) -> dict[str, Any]:
"""Sample classified review from Stage 2 output."""
return {
"source": "google",
"review_id": "ChdDSUhNMG9nS0VJQ0FnSURBdWJQX3h3RRAB",
"review_version": 1,
"urt_primary": "J1.01",
"urt_secondary": ["P1.02"],
"valence": "",
"intensity": "I3",
"comparative": "CR-N",
"staff_mentions": ["Mike"],
"quotes": {
"J1.01": "waited 45 minutes just to be seated",
"P1.02": "rude and dismissive",
},
"trust_score": 0.85,
"embedding": [0.1] * 384, # Placeholder
"spans": [
{
"span_id": "SPN-a1b2c3d4e5f67890",
"span_index": 0,
"span_text": "The food was great",
"span_start": 0,
"span_end": 18,
"profile": "standard",
"urt_primary": "O1.01",
"urt_secondary": [],
"valence": "V+",
"intensity": "I2",
"comparative": "CR-N",
"confidence": "high",
"usn": "URT:S:O1.01:+2:21TC.ES.N",
"is_primary": False,
},
sample_span,
],
"classification_confidence": {"overall": 0.85},
"processing_time_ms": 500,
}
@pytest.fixture
def sample_stage2_output(sample_classified_review: dict) -> dict[str, Any]:
"""Sample Stage 2 output."""
return {
"batch_id": "batch001",
"taxonomy_version": "v5.1",
"model_version": "gpt-4o-mini",
"prompt_version": "v1.0",
"reviews_classified": [sample_classified_review],
"stats": {
"input_count": 1,
"success_count": 1,
"error_count": 0,
"total_spans": 2,
"avg_spans_per_review": 2.0,
"llm_tokens_used": 1500,
"llm_cost_usd": 0.001,
},
}
@pytest.fixture
def sample_routed_span() -> dict[str, Any]:
"""Sample routed span from Stage 3."""
return {
"span_id": "SPN-b2c3d4e5f6789012",
"issue_id": "ISS-7a8b9c0d1e2f3a4b",
"routing_key": "acme-corp|ChIJN1t_tDeuEmsRUsoyG83frY4|J1.01|",
"is_new_issue": True,
}
@pytest.fixture
def sample_stage3_output(sample_routed_span: dict) -> dict[str, Any]:
"""Sample Stage 3 output."""
return {
"routed_spans": [sample_routed_span],
"issues_created": ["ISS-7a8b9c0d1e2f3a4b"],
"issues_updated": [],
"stats": {
"spans_processed": 2,
"spans_routed": 1,
"spans_skipped": 1,
"issues_created": 1,
"issues_updated": 0,
},
}
@pytest.fixture
def sample_fact() -> dict[str, Any]:
"""Sample fact record from Stage 4."""
return {
"business_id": "acme-corp",
"place_id": "ChIJN1t_tDeuEmsRUsoyG83frY4",
"period_date": "2026-01-20",
"bucket_type": "day",
"subject_type": "urt_code",
"subject_id": "J1.01",
"taxonomy_version": "v5.1",
"review_count": 1,
"span_count": 1,
"negative_count": 1,
"positive_count": 0,
"neutral_count": 0,
"mixed_count": 0,
"strength_score": 4.0,
"negative_strength": 4.0,
"positive_strength": 0.0,
"avg_rating": 2.0,
"i1_count": 0,
"i2_count": 0,
"i3_count": 1,
"cr_better": 0,
"cr_worse": 0,
"cr_same": 0,
"trust_weighted_strength": 3.4,
"trust_weighted_negative": 3.4,
}
@pytest.fixture
def sample_stage4_output(sample_fact: dict) -> dict[str, Any]:
"""Sample Stage 4 output."""
return {
"facts_written": [sample_fact],
"stats": {
"business_id": "acme-corp",
"date": "2026-01-20",
"locations_processed": 1,
"codes_aggregated": 1,
"facts_upserted": 1,
},
}
@pytest.fixture
def fixtures_dir() -> Path:
"""Get the path to the fixtures directory."""
return Path(__file__).parent / "fixtures"
# Helper to load JSON fixtures
def load_fixture(name: str) -> dict[str, Any]:
"""Load a JSON fixture by name."""
fixtures_path = Path(__file__).parent / "fixtures" / f"{name}.json"
if fixtures_path.exists():
return json.loads(fixtures_path.read_text())
raise FileNotFoundError(f"Fixture not found: {name}")

View File

@@ -0,0 +1 @@
"""Integration tests for reviewiq-pipeline."""

View File

@@ -0,0 +1,179 @@
"""End-to-end integration tests for the pipeline."""
from __future__ import annotations
import pytest
class TestPipelineE2E:
"""End-to-end integration tests."""
def test_stage1_to_stage2_contract(self, sample_scraper_output):
"""Test that Stage 1 output is valid Stage 2 input."""
from reviewiq_pipeline.config import Config
from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer
from reviewiq_pipeline.contracts import Stage1Input
config = Config()
normalizer = Stage1Normalizer(config)
# Run Stage 1
input_data = Stage1Input(
job_id=sample_scraper_output["job_id"],
business_id=sample_scraper_output["business_id"],
place_id=sample_scraper_output["place_id"],
reviews=sample_scraper_output["reviews"],
)
# Note: This is synchronous test, so we use the batch method
normalized = normalizer.normalize_batch(
sample_scraper_output["reviews"],
sample_scraper_output["business_id"],
sample_scraper_output["place_id"],
)
# Verify Stage 1 output can be used as Stage 2 input
assert len(normalized) > 0
for review in normalized:
# Check required fields for Stage 2
assert review["source"] is not None
assert review["review_id"] is not None
assert review["text"] is not None
assert review["text_normalized"] is not None
assert review["rating"] is not None
assert review["review_time"] is not None
def test_stage2_to_stage3_contract(self, sample_stage2_output):
"""Test that Stage 2 output spans can be routed by Stage 3."""
from reviewiq_pipeline.config import Config
from reviewiq_pipeline.stages.stage3_route import Stage3Router
from reviewiq_pipeline.contracts import SpanToRoute
config = Config()
router = Stage3Router(config)
# Extract negative spans from Stage 2 output
spans_to_route = []
for review in sample_stage2_output["reviews_classified"]:
for span in review.get("spans", []):
if span["valence"] in ("V-", ""):
spans_to_route.append(
SpanToRoute(
span_id=span["span_id"],
business_id="test-business",
place_id="test-place",
urt_primary=span["urt_primary"],
valence=span["valence"],
intensity=span["intensity"],
entity_normalized=span.get("entity_normalized"),
review_time="2026-01-20T14:30:00Z",
confidence=span.get("confidence", "medium"),
trust_score=0.85,
)
)
# Verify we can route these spans
for span in spans_to_route:
routed = router.route_span_sync(span)
assert routed["span_id"] == span["span_id"]
assert routed["issue_id"].startswith("ISS-")
def test_validation_chain(
self,
sample_stage1_output,
sample_stage2_output,
sample_stage3_output,
sample_stage4_output,
):
"""Test that all sample outputs pass validation."""
from reviewiq_pipeline.validation.validators import (
validate_stage1_output,
validate_stage2_output,
validate_stage4_output,
Stage3Validator,
)
# Validate Stage 1
result1 = validate_stage1_output(sample_stage1_output)
assert result1["passed"], f"Stage 1 failed: {result1['errors']}"
# Validate Stage 2
result2 = validate_stage2_output(sample_stage2_output)
assert result2["passed"], f"Stage 2 failed: {result2['errors']}"
# Validate Stage 3 (sync version)
validator3 = Stage3Validator()
result3 = validator3.validate_sync(sample_stage3_output)
assert result3["passed"], f"Stage 3 failed: {result3['errors']}"
# Validate Stage 4
result4 = validate_stage4_output(sample_stage4_output)
assert result4["passed"], f"Stage 4 failed: {result4['errors']}"
def test_text_normalization_preserves_meaning(self, sample_raw_review):
"""Test that normalization preserves review meaning."""
from reviewiq_pipeline.services.text_processor import TextProcessor
processor = TextProcessor()
result = processor.normalize(sample_raw_review["text"])
# Key terms should still be present (lowercased)
assert "food" in result.normalized
assert "wait" in result.normalized
assert "terrible" in result.normalized
assert "mike" in result.normalized
assert "steak" in result.normalized
def test_issue_id_determinism(self):
"""Test that same inputs always produce same issue ID."""
from reviewiq_pipeline.config import Config
from reviewiq_pipeline.stages.stage3_route import Stage3Router
config = Config()
router = Stage3Router(config)
span = {
"span_id": "test-span",
"business_id": "acme-corp",
"place_id": "place123",
"urt_primary": "J1.01",
"valence": "V-",
"intensity": "I3",
"entity_normalized": "mike",
"review_time": "2026-01-20T14:30:00Z",
"confidence": "high",
"trust_score": 0.85,
}
# Route the same span multiple times
ids = [router.route_span_sync(span)["issue_id"] for _ in range(10)]
# All IDs should be identical
assert len(set(ids)) == 1
@pytest.mark.asyncio
class TestAsyncPipeline:
"""Async pipeline tests (require database)."""
@pytest.mark.skip(reason="Requires database connection")
async def test_full_pipeline_flow(self, sample_scraper_output):
"""Test full pipeline from scraper output to facts."""
from reviewiq_pipeline import Pipeline, Config
config = Config(
database_url="postgresql://localhost:5432/reviewiq_test",
llm_provider="openai",
)
pipeline = Pipeline(config)
try:
await pipeline.initialize()
result = await pipeline.process(sample_scraper_output)
assert result.stage1 is not None
assert result.success or len(result.validation) > 0
finally:
await pipeline.close()

View File

@@ -0,0 +1,218 @@
"""Tests for Stage 1: Normalization."""
from __future__ import annotations
import pytest
from reviewiq_pipeline.services.text_processor import (
TextProcessor,
is_valid_iso639,
is_valid_sha256,
)
from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer
from reviewiq_pipeline.validation.validators import validate_stage1_output
class TestTextProcessor:
"""Tests for the TextProcessor service."""
def test_normalize_basic(self):
"""Test basic text normalization."""
processor = TextProcessor()
result = processor.normalize(" Hello World! ")
assert result.normalized == "hello world!"
assert result.word_count == 2
assert result.char_count == 12
def test_normalize_unicode(self):
"""Test Unicode normalization."""
processor = TextProcessor()
# NFC normalization test
result = processor.normalize("café")
assert "cafe" in result.normalized or "café" in result.normalized
def test_normalize_control_chars(self):
"""Test removal of control characters."""
processor = TextProcessor()
result = processor.normalize("Hello\x00World\x1fTest")
assert "\x00" not in result.normalized
assert "\x1f" not in result.normalized
def test_detect_language_english(self):
"""Test English language detection."""
processor = TextProcessor()
lang = processor.detect_language("This is a test sentence in English.")
assert lang == "en"
def test_generate_content_hash(self):
"""Test content hash generation."""
processor = TextProcessor()
hash1 = processor.generate_content_hash("test content")
hash2 = processor.generate_content_hash("test content")
hash3 = processor.generate_content_hash("different content")
assert hash1 == hash2 # Same input = same hash
assert hash1 != hash3 # Different input = different hash
assert len(hash1) == 64 # SHA256 hex length
def test_is_empty_or_trivial(self):
"""Test empty/trivial text detection."""
processor = TextProcessor()
assert processor.is_empty_or_trivial(None) is True
assert processor.is_empty_or_trivial("") is True
assert processor.is_empty_or_trivial(" ") is True
assert processor.is_empty_or_trivial("ab") is True
assert processor.is_empty_or_trivial("abc") is False
assert processor.is_empty_or_trivial("Hello world") is False
class TestHelperFunctions:
"""Tests for helper functions."""
def test_is_valid_iso639(self):
"""Test ISO 639-1 validation."""
assert is_valid_iso639("en") is True
assert is_valid_iso639("es") is True
assert is_valid_iso639("fr") is True
assert is_valid_iso639("de") is True
assert is_valid_iso639("xx") is False
assert is_valid_iso639("") is False
assert is_valid_iso639("english") is False
def test_is_valid_sha256(self):
"""Test SHA256 hash validation."""
valid_hash = "a" * 64
invalid_short = "a" * 63
invalid_long = "a" * 65
invalid_chars = "g" * 64 # 'g' is not hex
assert is_valid_sha256(valid_hash) is True
assert is_valid_sha256(invalid_short) is False
assert is_valid_sha256(invalid_long) is False
assert is_valid_sha256(invalid_chars) is False
assert is_valid_sha256("") is False
assert is_valid_sha256(None) is False # type: ignore
class TestStage1Normalizer:
"""Tests for Stage 1 normalizer."""
def test_normalize_review_basic(self, sample_raw_review):
"""Test basic review normalization."""
from reviewiq_pipeline.config import Config
config = Config()
normalizer = Stage1Normalizer(config)
result = normalizer._normalize_review(
sample_raw_review,
"test-business",
"test-place",
)
assert result is not None
assert result["source"] == "google"
assert result["review_id"] == sample_raw_review["review_id"]
assert result["text"] == sample_raw_review["text"]
assert result["text_normalized"] is not None
assert result["text_language"] == "en"
assert len(result["content_hash"]) == 64
def test_normalize_empty_review(self):
"""Test that empty reviews are skipped."""
from reviewiq_pipeline.config import Config
config = Config()
normalizer = Stage1Normalizer(config)
empty_review = {
"review_id": "test-empty",
"author_name": "Test",
"rating": 5,
"text": "",
"review_time": "2026-01-20T14:30:00Z",
}
result = normalizer._normalize_review(empty_review, "test-business", "test-place")
assert result is None
def test_normalize_batch(self, sample_raw_review):
"""Test batch normalization."""
from reviewiq_pipeline.config import Config
config = Config()
normalizer = Stage1Normalizer(config)
reviews = [
sample_raw_review,
{
"review_id": "second-review",
"author_name": "Jane",
"rating": 5,
"text": "Great service!",
"review_time": "2026-01-21T10:00:00Z",
},
]
results = normalizer.normalize_batch(reviews, "test-business", "test-place")
assert len(results) == 2
def test_normalize_deduplication(self, sample_raw_review):
"""Test that duplicate reviews are detected."""
from reviewiq_pipeline.config import Config
config = Config()
normalizer = Stage1Normalizer(config)
# Two reviews with same text
reviews = [
sample_raw_review,
{**sample_raw_review, "review_id": "duplicate-review"},
]
results = normalizer.normalize_batch(reviews, "test-business", "test-place")
assert len(results) == 1 # Duplicate should be filtered
class TestStage1Validation:
"""Tests for Stage 1 validation."""
def test_validate_valid_output(self, sample_stage1_output):
"""Test validation of valid Stage 1 output."""
result = validate_stage1_output(sample_stage1_output)
assert result["stage"] == "stage1"
assert result["passed"] is True
assert result["error_count"] == 0
def test_validate_empty_text(self, sample_stage1_output):
"""Test validation catches empty text."""
sample_stage1_output["reviews_normalized"][0]["text"] = ""
result = validate_stage1_output(sample_stage1_output)
assert result["passed"] is False
assert any(e["rule"] == "V1.1" for e in result["errors"])
def test_validate_invalid_hash(self, sample_stage1_output):
"""Test validation catches invalid content hash."""
sample_stage1_output["reviews_normalized"][0]["content_hash"] = "invalid"
result = validate_stage1_output(sample_stage1_output)
assert result["passed"] is False
assert any(e["rule"] == "V1.3" for e in result["errors"])
def test_validate_invalid_language(self, sample_stage1_output):
"""Test validation catches invalid language code."""
sample_stage1_output["reviews_normalized"][0]["text_language"] = "invalid"
result = validate_stage1_output(sample_stage1_output)
assert result["passed"] is False
assert any(e["rule"] == "V1.5" for e in result["errors"])

View File

@@ -0,0 +1,193 @@
"""Tests for Stage 2: LLM Classification."""
from __future__ import annotations
import pytest
from reviewiq_pipeline.services.llm_client import create_fallback_response
from reviewiq_pipeline.validation.validators import validate_stage2_output
class TestLLMClient:
"""Tests for LLM client functionality."""
def test_fallback_response_structure(self):
"""Test that fallback response has correct structure."""
review_text = "This is a test review."
response = create_fallback_response(review_text)
assert "spans" in response
assert "review_summary" in response
assert len(response["spans"]) == 1
span = response["spans"][0]
assert span["span_index"] == 0
assert span["span_text"] == review_text
assert span["span_start"] == 0
assert span["span_end"] == len(review_text)
assert span["is_primary"] is True
assert span["confidence"] == "low"
def test_fallback_response_valid_urt(self):
"""Test that fallback response has valid URT codes."""
response = create_fallback_response("Test review")
span = response["spans"][0]
assert span["urt_primary"] == "O1.01"
assert span["valence"] == "V0"
assert span["intensity"] == "I1"
class TestStage2Validation:
"""Tests for Stage 2 validation."""
def test_validate_valid_output(self, sample_stage2_output):
"""Test validation of valid Stage 2 output."""
result = validate_stage2_output(sample_stage2_output)
assert result["stage"] == "stage2"
assert result["passed"] is True
assert result["error_count"] == 0
def test_validate_invalid_urt_code(self, sample_stage2_output):
"""Test validation catches invalid URT code."""
sample_stage2_output["reviews_classified"][0]["urt_primary"] = "INVALID"
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.1" for e in result["errors"])
def test_validate_too_many_secondary(self, sample_stage2_output):
"""Test validation catches too many secondary codes."""
sample_stage2_output["reviews_classified"][0]["urt_secondary"] = [
"O1.01", "O1.02", "O1.03"
]
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.2" for e in result["errors"])
def test_validate_invalid_valence(self, sample_stage2_output):
"""Test validation catches invalid valence."""
sample_stage2_output["reviews_classified"][0]["valence"] = "INVALID"
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.3" for e in result["errors"])
def test_validate_invalid_trust_score(self, sample_stage2_output):
"""Test validation catches trust score out of bounds."""
sample_stage2_output["reviews_classified"][0]["trust_score"] = 0.1
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.9" for e in result["errors"])
def test_validate_invalid_embedding_dim(self, sample_stage2_output):
"""Test validation catches wrong embedding dimension."""
sample_stage2_output["reviews_classified"][0]["embedding"] = [0.1] * 100
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.10" for e in result["errors"])
def test_validate_multiple_primaries(self, sample_stage2_output):
"""Test validation catches multiple primary spans."""
for span in sample_stage2_output["reviews_classified"][0]["spans"]:
span["is_primary"] = True
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.8" for e in result["errors"])
def test_validate_no_primary(self, sample_stage2_output):
"""Test validation catches no primary span."""
for span in sample_stage2_output["reviews_classified"][0]["spans"]:
span["is_primary"] = False
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.8" for e in result["errors"])
def test_validate_invalid_span_bounds(self, sample_stage2_output):
"""Test validation catches invalid span bounds."""
sample_stage2_output["reviews_classified"][0]["spans"][0]["span_start"] = 100
sample_stage2_output["reviews_classified"][0]["spans"][0]["span_end"] = 50
result = validate_stage2_output(sample_stage2_output)
assert result["passed"] is False
assert any(e["rule"] == "V2.5" for e in result["errors"])
class TestSpanExtraction:
"""Tests for span extraction logic."""
def test_primary_span_selection_by_intensity(self):
"""Test that primary span is selected by highest intensity."""
from reviewiq_pipeline.stages.stage2_classify import Stage2Classifier
from reviewiq_pipeline.config import Config
config = Config()
classifier = Stage2Classifier(config)
spans = [
{
"span_id": "span1",
"span_index": 0,
"valence": "V-",
"intensity": "I1",
"is_primary": False,
},
{
"span_id": "span2",
"span_index": 1,
"valence": "V-",
"intensity": "I3",
"is_primary": False,
},
]
result = classifier._ensure_primary_span(spans)
# Span with I3 should be primary
assert result[1]["is_primary"] is True
assert result[0]["is_primary"] is False
def test_primary_span_selection_by_valence(self):
"""Test that negative valence is preferred over positive."""
from reviewiq_pipeline.stages.stage2_classify import Stage2Classifier
from reviewiq_pipeline.config import Config
config = Config()
classifier = Stage2Classifier(config)
spans = [
{
"span_id": "span1",
"span_index": 0,
"valence": "V+",
"intensity": "I2",
"is_primary": False,
},
{
"span_id": "span2",
"span_index": 1,
"valence": "V-",
"intensity": "I2",
"is_primary": False,
},
]
result = classifier._ensure_primary_span(spans)
# Span with V- should be primary
assert result[1]["is_primary"] is True
assert result[0]["is_primary"] is False

View File

@@ -0,0 +1,162 @@
"""Tests for Stage 3: Issue Routing."""
from __future__ import annotations
import pytest
from reviewiq_pipeline.stages.stage3_route import Stage3Router
from reviewiq_pipeline.validation.validators import Stage3Validator
class TestStage3Router:
"""Tests for Stage 3 router."""
def test_generate_routing_key(self):
"""Test routing key generation."""
from reviewiq_pipeline.config import Config
config = Config()
router = Stage3Router(config)
span = {
"span_id": "test-span",
"business_id": "acme-corp",
"place_id": "place123",
"urt_primary": "J1.01",
"valence": "V-",
"intensity": "I3",
"entity_normalized": "mike",
"review_time": "2026-01-20T14:30:00Z",
"confidence": "high",
"trust_score": 0.85,
}
key = router._generate_routing_key(span)
assert key == "acme-corp|place123|J1.01|mike"
def test_generate_routing_key_no_entity(self):
"""Test routing key generation without entity."""
from reviewiq_pipeline.config import Config
config = Config()
router = Stage3Router(config)
span = {
"span_id": "test-span",
"business_id": "acme-corp",
"place_id": "place123",
"urt_primary": "J1.01",
"valence": "V-",
"intensity": "I3",
"entity_normalized": None,
"review_time": "2026-01-20T14:30:00Z",
"confidence": "high",
"trust_score": 0.85,
}
key = router._generate_routing_key(span)
assert key == "acme-corp|place123|J1.01|"
def test_generate_issue_id(self):
"""Test deterministic issue ID generation."""
from reviewiq_pipeline.config import Config
config = Config()
router = Stage3Router(config)
key1 = "acme-corp|place123|J1.01|"
key2 = "acme-corp|place123|J1.01|"
key3 = "acme-corp|place123|J1.02|"
id1 = router._generate_issue_id(key1)
id2 = router._generate_issue_id(key2)
id3 = router._generate_issue_id(key3)
# Same key = same ID
assert id1 == id2
# Different key = different ID
assert id1 != id3
# Format check
assert id1.startswith("ISS-")
assert len(id1) == 20 # ISS- + 16 hex chars
def test_route_span_sync(self):
"""Test synchronous span routing."""
from reviewiq_pipeline.config import Config
config = Config()
router = Stage3Router(config)
span = {
"span_id": "test-span",
"business_id": "acme-corp",
"place_id": "place123",
"urt_primary": "J1.01",
"valence": "V-",
"intensity": "I3",
"entity_normalized": None,
"review_time": "2026-01-20T14:30:00Z",
"confidence": "high",
"trust_score": 0.85,
}
result = router.route_span_sync(span)
assert result["span_id"] == "test-span"
assert result["issue_id"].startswith("ISS-")
assert "J1.01" in result["routing_key"]
def test_route_span_rejects_positive(self):
"""Test that positive spans cannot be routed."""
from reviewiq_pipeline.config import Config
config = Config()
router = Stage3Router(config)
span = {
"span_id": "test-span",
"business_id": "acme-corp",
"place_id": "place123",
"urt_primary": "O1.01",
"valence": "V+", # Positive
"intensity": "I2",
"entity_normalized": None,
"review_time": "2026-01-20T14:30:00Z",
"confidence": "high",
"trust_score": 0.85,
}
with pytest.raises(ValueError, match="Cannot route positive span"):
router.route_span_sync(span)
class TestStage3Validation:
"""Tests for Stage 3 validation."""
def test_validate_valid_output(self, sample_stage3_output):
"""Test validation of valid Stage 3 output."""
validator = Stage3Validator()
result = validator.validate_sync(sample_stage3_output)
assert result["stage"] == "stage3"
assert result["passed"] is True
def test_validate_invalid_issue_id(self, sample_stage3_output):
"""Test validation catches invalid issue ID format."""
sample_stage3_output["routed_spans"][0]["issue_id"] = "INVALID"
validator = Stage3Validator()
result = validator.validate_sync(sample_stage3_output)
assert result["passed"] is False
assert any(e["rule"] == "V3.1" for e in result["errors"])
def test_validate_empty_routing_key(self, sample_stage3_output):
"""Test validation catches empty routing key."""
sample_stage3_output["routed_spans"][0]["routing_key"] = ""
validator = Stage3Validator()
result = validator.validate_sync(sample_stage3_output)
assert result["passed"] is False
assert any(e["rule"] == "V3.2" for e in result["errors"])

View File

@@ -0,0 +1,201 @@
"""Tests for Stage 4: Fact Aggregation."""
from __future__ import annotations
from datetime import date
import pytest
from reviewiq_pipeline.stages.stage4_aggregate import Stage4Aggregator
from reviewiq_pipeline.validation.validators import validate_stage4_output
class TestStage4Aggregator:
"""Tests for Stage 4 aggregator."""
def test_get_bucket_range_day(self):
"""Test day bucket range calculation."""
from reviewiq_pipeline.config import Config
config = Config()
aggregator = Stage4Aggregator(config)
target = date(2026, 1, 20)
start, end = aggregator._get_bucket_range(target, "day")
assert start == target
assert end == target
def test_get_bucket_range_week(self):
"""Test week bucket range calculation."""
from reviewiq_pipeline.config import Config
config = Config()
aggregator = Stage4Aggregator(config)
# 2026-01-20 is a Tuesday
target = date(2026, 1, 20)
start, end = aggregator._get_bucket_range(target, "week")
# Week should start on Monday (Jan 19) and end on Sunday (Jan 25)
assert start == date(2026, 1, 19)
assert end == date(2026, 1, 25)
def test_get_bucket_range_month(self):
"""Test month bucket range calculation."""
from reviewiq_pipeline.config import Config
config = Config()
aggregator = Stage4Aggregator(config)
target = date(2026, 1, 20)
start, end = aggregator._get_bucket_range(target, "month")
assert start == date(2026, 1, 1)
assert end == date(2026, 1, 31)
def test_compute_strength_score(self):
"""Test strength score calculation."""
from reviewiq_pipeline.config import Config
config = Config()
aggregator = Stage4Aggregator(config)
spans = [
{"valence": "V-", "intensity": "I3"}, # 4 * 1.0 = 4
{"valence": "V-", "intensity": "I2"}, # 2 * 1.0 = 2
{"valence": "V+", "intensity": "I2"}, # 2 * 1.0 = 2
{"valence": "V0", "intensity": "I1"}, # 1 * 0.0 = 0
]
score = aggregator._compute_strength_score(spans)
assert score == 8.0
def test_compute_trust_weighted_strength(self):
"""Test trust-weighted strength calculation."""
from reviewiq_pipeline.config import Config
config = Config()
aggregator = Stage4Aggregator(config)
spans = [
{"valence": "V-", "intensity": "I3", "trust_score": 1.0}, # 4 * 1.0 * 1.0 = 4
{"valence": "V-", "intensity": "I2", "trust_score": 0.5}, # 2 * 1.0 * 0.5 = 1
]
score = aggregator._compute_trust_weighted_strength(spans)
assert score == 5.0
def test_compute_fact_metrics(self):
"""Test fact metrics computation."""
from reviewiq_pipeline.config import Config
config = Config()
aggregator = Stage4Aggregator(config)
spans = [
{
"valence": "V-",
"intensity": "I3",
"comparative": "CR-N",
"trust_score": 0.8,
"rating": 2,
},
{
"valence": "V+",
"intensity": "I2",
"comparative": "CR-B",
"trust_score": 0.9,
"rating": 5,
},
]
fact = aggregator._compute_fact_metrics(
spans,
"test-business",
"test-place",
"2026-01-20",
"day",
"urt_code",
"J1.01",
"v5.1",
)
assert fact["span_count"] == 2
assert fact["negative_count"] == 1
assert fact["positive_count"] == 1
assert fact["i3_count"] == 1
assert fact["i2_count"] == 1
assert fact["cr_better"] == 1
assert fact["avg_rating"] == 3.5
class TestStage4Validation:
"""Tests for Stage 4 validation."""
def test_validate_valid_output(self, sample_stage4_output):
"""Test validation of valid Stage 4 output."""
result = validate_stage4_output(sample_stage4_output)
assert result["stage"] == "stage4"
assert result["passed"] is True
def test_validate_span_less_than_review(self, sample_stage4_output):
"""Test validation catches span_count < review_count."""
sample_stage4_output["facts_written"][0]["span_count"] = 0
sample_stage4_output["facts_written"][0]["review_count"] = 1
result = validate_stage4_output(sample_stage4_output)
assert result["passed"] is False
assert any(e["rule"] == "V4.3" for e in result["errors"])
def test_validate_valence_sum(self, sample_stage4_output):
"""Test validation catches valence sum mismatch."""
# Set span_count to 5 but valence counts only sum to 1
sample_stage4_output["facts_written"][0]["span_count"] = 5
result = validate_stage4_output(sample_stage4_output)
assert result["passed"] is False
assert any(e["rule"] == "V4.4" for e in result["errors"])
def test_validate_intensity_sum(self, sample_stage4_output):
"""Test validation catches intensity sum mismatch."""
# Set span_count to 5 but intensity counts only sum to 1
sample_stage4_output["facts_written"][0]["span_count"] = 5
# Fix valence sum
sample_stage4_output["facts_written"][0]["negative_count"] = 5
result = validate_stage4_output(sample_stage4_output)
assert result["passed"] is False
assert any(e["rule"] == "V4.5" for e in result["errors"])
def test_validate_negative_strength(self, sample_stage4_output):
"""Test validation catches negative strength score."""
sample_stage4_output["facts_written"][0]["strength_score"] = -1.0
result = validate_stage4_output(sample_stage4_output)
assert result["passed"] is False
assert any(e["rule"] == "V4.6" for e in result["errors"])
def test_validate_invalid_rating(self, sample_stage4_output):
"""Test validation catches invalid average rating."""
sample_stage4_output["facts_written"][0]["avg_rating"] = 6.0
result = validate_stage4_output(sample_stage4_output)
assert result["passed"] is False
assert any(e["rule"] == "V4.7" for e in result["errors"])
def test_validate_null_rating_allowed(self, sample_stage4_output):
"""Test that NULL rating is allowed."""
sample_stage4_output["facts_written"][0]["avg_rating"] = None
result = validate_stage4_output(sample_stage4_output)
# Should still pass (NULL is valid)
# Check no V4.7 errors
assert not any(e["rule"] == "V4.7" for e in result["errors"])