diff --git a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py index e5874c2..c652d71 100644 --- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py +++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py @@ -486,10 +486,13 @@ class Stage5Synthesizer: ORDER BY negative DESC """, job_id) - # Business name + # Business name - get the most common one (in case of data leakage) business = await self.pool.fetchval(""" - SELECT DISTINCT business_id FROM pipeline.reviews_enriched - WHERE job_id = $1::uuid LIMIT 1 + SELECT business_id FROM pipeline.reviews_enriched + WHERE job_id = $1::uuid + GROUP BY business_id + ORDER BY COUNT(*) DESC + LIMIT 1 """, job_id) # MOMENTUM: Calculate from data (not LLM guess)