From c797470421d382044a25c308511fb823eccad8da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Guti=C3=A9rrez?= <35082514+alezmad@users.noreply.github.com> Date: Fri, 30 Jan 2026 15:49:51 +0000 Subject: [PATCH] fix(synthesis): Calculate analysis_period from actual data range Previously hardcoded "Last 12 months" which was misleading when data spanned multiple years. Now calculates the actual period from the earliest to latest review dates in the dataset. Co-Authored-By: Claude Opus 4.5 --- .../stages/stage5_synthesize.py | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py index c652d71..0e2912f 100644 --- a/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py +++ b/packages/reviewiq-pipeline/src/reviewiq_pipeline/stages/stage5_synthesize.py @@ -407,12 +407,14 @@ class Stage5Synthesizer: async def _gather_context(self, job_id: str) -> dict[str, Any]: """Gather all context needed for report generation.""" - # Overview stats + # Overview stats with date range overview = await self.pool.fetchrow(""" SELECT COUNT(DISTINCT r.review_id) as total_reviews, ROUND(AVG(r.rating)::numeric, 2) as avg_rating, - COUNT(s.span_id) as total_spans + COUNT(s.span_id) as total_spans, + MIN(r.review_time) as earliest_review, + MAX(r.review_time) as latest_review FROM pipeline.reviews_enriched r LEFT JOIN pipeline.review_spans s ON s.review_id = r.review_id WHERE r.job_id = $1::uuid @@ -577,11 +579,30 @@ class Stage5Synthesizer: LIMIT 10 """, job_id) + # Calculate analysis period from actual date range + earliest = overview["earliest_review"] + latest = overview["latest_review"] + if earliest and latest: + from datetime import datetime + days_span = (latest - earliest).days + if days_span <= 31: + analysis_period = "Last month" + elif days_span <= 90: + analysis_period = "Last 3 months" + elif days_span <= 365: + analysis_period = "Last 12 months" + else: + years = days_span // 365 + analysis_period = f"Last {years} years" if years > 1 else "Last year" + else: + analysis_period = "All available data" + return { "business_name": business or "This business", "total_reviews": overview["total_reviews"] or 0, "avg_rating": float(overview["avg_rating"] or 0), "total_spans": overview["total_spans"] or 0, + "analysis_period": analysis_period, "sentiment": [dict(r) for r in sentiment], "top_issues": [dict(r) for r in top_issues], "top_strengths": [dict(r) for r in top_strengths], @@ -869,7 +890,7 @@ class Stage5Synthesizer: generated_at=datetime.utcnow().isoformat(), review_count=context["total_reviews"], insight_count=context["total_spans"], - analysis_period="Last 12 months", + analysis_period=context.get("analysis_period", "All available data"), charts=chart_data, ) @@ -1022,7 +1043,7 @@ Generate a comprehensive analyst report based on this data.""" generated_at=datetime.utcnow().isoformat(), review_count=ctx["total_reviews"], insight_count=ctx["total_spans"], - analysis_period="Last 12 months", + analysis_period=ctx.get("analysis_period", "All available data"), charts=chart_data, )