"""End-to-end integration tests for the pipeline.""" from __future__ import annotations import pytest class TestPipelineE2E: """End-to-end integration tests.""" def test_stage1_to_stage2_contract(self, sample_scraper_output): """Test that Stage 1 output is valid Stage 2 input.""" from reviewiq_pipeline.config import Config from reviewiq_pipeline.stages.stage1_normalize import Stage1Normalizer from reviewiq_pipeline.contracts import Stage1Input config = Config() normalizer = Stage1Normalizer(config) # Run Stage 1 input_data = Stage1Input( job_id=sample_scraper_output["job_id"], business_id=sample_scraper_output["business_id"], place_id=sample_scraper_output["place_id"], reviews=sample_scraper_output["reviews"], ) # Note: This is synchronous test, so we use the batch method normalized = normalizer.normalize_batch( sample_scraper_output["reviews"], sample_scraper_output["business_id"], sample_scraper_output["place_id"], ) # Verify Stage 1 output can be used as Stage 2 input assert len(normalized) > 0 for review in normalized: # Check required fields for Stage 2 assert review["source"] is not None assert review["review_id"] is not None assert review["text"] is not None assert review["text_normalized"] is not None assert review["rating"] is not None assert review["review_time"] is not None def test_stage2_to_stage3_contract(self, sample_stage2_output): """Test that Stage 2 output spans can be routed by Stage 3.""" from reviewiq_pipeline.config import Config from reviewiq_pipeline.stages.stage3_route import Stage3Router from reviewiq_pipeline.contracts import SpanToRoute config = Config() router = Stage3Router(config) # Extract negative spans from Stage 2 output spans_to_route = [] for review in sample_stage2_output["reviews_classified"]: for span in review.get("spans", []): if span["valence"] in ("V-", "V±"): spans_to_route.append( SpanToRoute( span_id=span["span_id"], business_id="test-business", place_id="test-place", urt_primary=span["urt_primary"], valence=span["valence"], intensity=span["intensity"], entity_normalized=span.get("entity_normalized"), review_time="2026-01-20T14:30:00Z", confidence=span.get("confidence", "medium"), trust_score=0.85, ) ) # Verify we can route these spans for span in spans_to_route: routed = router.route_span_sync(span) assert routed["span_id"] == span["span_id"] assert routed["issue_id"].startswith("ISS-") def test_validation_chain( self, sample_stage1_output, sample_stage2_output, sample_stage3_output, sample_stage4_output, ): """Test that all sample outputs pass validation.""" from reviewiq_pipeline.validation.validators import ( validate_stage1_output, validate_stage2_output, validate_stage4_output, Stage3Validator, ) # Validate Stage 1 result1 = validate_stage1_output(sample_stage1_output) assert result1["passed"], f"Stage 1 failed: {result1['errors']}" # Validate Stage 2 result2 = validate_stage2_output(sample_stage2_output) assert result2["passed"], f"Stage 2 failed: {result2['errors']}" # Validate Stage 3 (sync version) validator3 = Stage3Validator() result3 = validator3.validate_sync(sample_stage3_output) assert result3["passed"], f"Stage 3 failed: {result3['errors']}" # Validate Stage 4 result4 = validate_stage4_output(sample_stage4_output) assert result4["passed"], f"Stage 4 failed: {result4['errors']}" def test_text_normalization_preserves_meaning(self, sample_raw_review): """Test that normalization preserves review meaning.""" from reviewiq_pipeline.services.text_processor import TextProcessor processor = TextProcessor() result = processor.normalize(sample_raw_review["text"]) # Key terms should still be present (lowercased) assert "food" in result.normalized assert "wait" in result.normalized assert "terrible" in result.normalized assert "mike" in result.normalized assert "steak" in result.normalized def test_issue_id_determinism(self): """Test that same inputs always produce same issue ID.""" from reviewiq_pipeline.config import Config from reviewiq_pipeline.stages.stage3_route import Stage3Router config = Config() router = Stage3Router(config) span = { "span_id": "test-span", "business_id": "acme-corp", "place_id": "place123", "urt_primary": "J1.01", "valence": "V-", "intensity": "I3", "entity_normalized": "mike", "review_time": "2026-01-20T14:30:00Z", "confidence": "high", "trust_score": 0.85, } # Route the same span multiple times ids = [router.route_span_sync(span)["issue_id"] for _ in range(10)] # All IDs should be identical assert len(set(ids)) == 1 @pytest.mark.asyncio class TestAsyncPipeline: """Async pipeline tests (require database).""" @pytest.mark.skip(reason="Requires database connection") async def test_full_pipeline_flow(self, sample_scraper_output): """Test full pipeline from scraper output to facts.""" from reviewiq_pipeline import Pipeline, Config config = Config( database_url="postgresql://localhost:5432/reviewiq_test", llm_provider="openai", ) pipeline = Pipeline(config) try: await pipeline.initialize() result = await pipeline.process(sample_scraper_output) assert result.stage1 is not None assert result.success or len(result.validation) > 0 finally: await pipeline.close()