feat: Add reviewiq-pipeline package for LLM-powered review classification
Implement a standalone Python package for processing customer reviews through a 4-stage pipeline using URT (Universal Review Taxonomy) v5.1: - Stage 1: Normalization (text cleaning, language detection, deduplication) - Stage 2: LLM Classification (OpenAI/Anthropic span extraction with URT codes) - Stage 3: Issue Routing (deterministic issue ID generation, span linking) - Stage 4: Fact Aggregation (time series metrics for dashboards) Package includes: - TypedDict contracts matching Pipeline-Contracts-v1.md - Async database layer with asyncpg and 5 SQL migrations - LLM client abstraction supporting both OpenAI and Anthropic - Sentence-transformers integration for embeddings - Validation rules V1.x through V4.x - CLI commands: migrate, run, validate, check - 55 unit and integration tests (all passing) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
179
packages/reviewiq-pipeline/tests/integration/test_e2e.py
Normal file
179
packages/reviewiq-pipeline/tests/integration/test_e2e.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""End-to-end integration tests for the pipeline."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestPipelineE2E:
|
||||
"""End-to-end integration tests."""
|
||||
|
||||
def test_stage1_to_stage2_contract(self, sample_scraper_output):
|
||||
"""Test that Stage 1 output is valid Stage 2 input."""
|
||||
from reviewiq_pipeline.config import Config
|
||||
from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer
|
||||
from reviewiq_pipeline.contracts import Stage1Input
|
||||
|
||||
config = Config()
|
||||
normalizer = Stage1Normalizer(config)
|
||||
|
||||
# Run Stage 1
|
||||
input_data = Stage1Input(
|
||||
job_id=sample_scraper_output["job_id"],
|
||||
business_id=sample_scraper_output["business_id"],
|
||||
place_id=sample_scraper_output["place_id"],
|
||||
reviews=sample_scraper_output["reviews"],
|
||||
)
|
||||
|
||||
# Note: This is synchronous test, so we use the batch method
|
||||
normalized = normalizer.normalize_batch(
|
||||
sample_scraper_output["reviews"],
|
||||
sample_scraper_output["business_id"],
|
||||
sample_scraper_output["place_id"],
|
||||
)
|
||||
|
||||
# Verify Stage 1 output can be used as Stage 2 input
|
||||
assert len(normalized) > 0
|
||||
for review in normalized:
|
||||
# Check required fields for Stage 2
|
||||
assert review["source"] is not None
|
||||
assert review["review_id"] is not None
|
||||
assert review["text"] is not None
|
||||
assert review["text_normalized"] is not None
|
||||
assert review["rating"] is not None
|
||||
assert review["review_time"] is not None
|
||||
|
||||
def test_stage2_to_stage3_contract(self, sample_stage2_output):
|
||||
"""Test that Stage 2 output spans can be routed by Stage 3."""
|
||||
from reviewiq_pipeline.config import Config
|
||||
from reviewiq_pipeline.stages.stage3_route import Stage3Router
|
||||
from reviewiq_pipeline.contracts import SpanToRoute
|
||||
|
||||
config = Config()
|
||||
router = Stage3Router(config)
|
||||
|
||||
# Extract negative spans from Stage 2 output
|
||||
spans_to_route = []
|
||||
for review in sample_stage2_output["reviews_classified"]:
|
||||
for span in review.get("spans", []):
|
||||
if span["valence"] in ("V-", "V±"):
|
||||
spans_to_route.append(
|
||||
SpanToRoute(
|
||||
span_id=span["span_id"],
|
||||
business_id="test-business",
|
||||
place_id="test-place",
|
||||
urt_primary=span["urt_primary"],
|
||||
valence=span["valence"],
|
||||
intensity=span["intensity"],
|
||||
entity_normalized=span.get("entity_normalized"),
|
||||
review_time="2026-01-20T14:30:00Z",
|
||||
confidence=span.get("confidence", "medium"),
|
||||
trust_score=0.85,
|
||||
)
|
||||
)
|
||||
|
||||
# Verify we can route these spans
|
||||
for span in spans_to_route:
|
||||
routed = router.route_span_sync(span)
|
||||
assert routed["span_id"] == span["span_id"]
|
||||
assert routed["issue_id"].startswith("ISS-")
|
||||
|
||||
def test_validation_chain(
|
||||
self,
|
||||
sample_stage1_output,
|
||||
sample_stage2_output,
|
||||
sample_stage3_output,
|
||||
sample_stage4_output,
|
||||
):
|
||||
"""Test that all sample outputs pass validation."""
|
||||
from reviewiq_pipeline.validation.validators import (
|
||||
validate_stage1_output,
|
||||
validate_stage2_output,
|
||||
validate_stage4_output,
|
||||
Stage3Validator,
|
||||
)
|
||||
|
||||
# Validate Stage 1
|
||||
result1 = validate_stage1_output(sample_stage1_output)
|
||||
assert result1["passed"], f"Stage 1 failed: {result1['errors']}"
|
||||
|
||||
# Validate Stage 2
|
||||
result2 = validate_stage2_output(sample_stage2_output)
|
||||
assert result2["passed"], f"Stage 2 failed: {result2['errors']}"
|
||||
|
||||
# Validate Stage 3 (sync version)
|
||||
validator3 = Stage3Validator()
|
||||
result3 = validator3.validate_sync(sample_stage3_output)
|
||||
assert result3["passed"], f"Stage 3 failed: {result3['errors']}"
|
||||
|
||||
# Validate Stage 4
|
||||
result4 = validate_stage4_output(sample_stage4_output)
|
||||
assert result4["passed"], f"Stage 4 failed: {result4['errors']}"
|
||||
|
||||
def test_text_normalization_preserves_meaning(self, sample_raw_review):
|
||||
"""Test that normalization preserves review meaning."""
|
||||
from reviewiq_pipeline.services.text_processor import TextProcessor
|
||||
|
||||
processor = TextProcessor()
|
||||
result = processor.normalize(sample_raw_review["text"])
|
||||
|
||||
# Key terms should still be present (lowercased)
|
||||
assert "food" in result.normalized
|
||||
assert "wait" in result.normalized
|
||||
assert "terrible" in result.normalized
|
||||
assert "mike" in result.normalized
|
||||
assert "steak" in result.normalized
|
||||
|
||||
def test_issue_id_determinism(self):
|
||||
"""Test that same inputs always produce same issue ID."""
|
||||
from reviewiq_pipeline.config import Config
|
||||
from reviewiq_pipeline.stages.stage3_route import Stage3Router
|
||||
|
||||
config = Config()
|
||||
router = Stage3Router(config)
|
||||
|
||||
span = {
|
||||
"span_id": "test-span",
|
||||
"business_id": "acme-corp",
|
||||
"place_id": "place123",
|
||||
"urt_primary": "J1.01",
|
||||
"valence": "V-",
|
||||
"intensity": "I3",
|
||||
"entity_normalized": "mike",
|
||||
"review_time": "2026-01-20T14:30:00Z",
|
||||
"confidence": "high",
|
||||
"trust_score": 0.85,
|
||||
}
|
||||
|
||||
# Route the same span multiple times
|
||||
ids = [router.route_span_sync(span)["issue_id"] for _ in range(10)]
|
||||
|
||||
# All IDs should be identical
|
||||
assert len(set(ids)) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAsyncPipeline:
|
||||
"""Async pipeline tests (require database)."""
|
||||
|
||||
@pytest.mark.skip(reason="Requires database connection")
|
||||
async def test_full_pipeline_flow(self, sample_scraper_output):
|
||||
"""Test full pipeline from scraper output to facts."""
|
||||
from reviewiq_pipeline import Pipeline, Config
|
||||
|
||||
config = Config(
|
||||
database_url="postgresql://localhost:5432/reviewiq_test",
|
||||
llm_provider="openai",
|
||||
)
|
||||
|
||||
pipeline = Pipeline(config)
|
||||
|
||||
try:
|
||||
await pipeline.initialize()
|
||||
result = await pipeline.process(sample_scraper_output)
|
||||
|
||||
assert result.stage1 is not None
|
||||
assert result.success or len(result.validation) > 0
|
||||
|
||||
finally:
|
||||
await pipeline.close()
|
||||
Reference in New Issue
Block a user