fix(synthesis): Calculate analysis_period from actual data range

Previously hardcoded "Last 12 months" which was misleading when data
spanned multiple years. Now calculates the actual period from the
earliest to latest review dates in the dataset.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-30 15:49:51 +00:00
parent 479f1ee94a
commit c797470421

View File

@@ -407,12 +407,14 @@ class Stage5Synthesizer:
async def _gather_context(self, job_id: str) -> dict[str, Any]:
"""Gather all context needed for report generation."""
# Overview stats
# Overview stats with date range
overview = await self.pool.fetchrow("""
SELECT
COUNT(DISTINCT r.review_id) as total_reviews,
ROUND(AVG(r.rating)::numeric, 2) as avg_rating,
COUNT(s.span_id) as total_spans
COUNT(s.span_id) as total_spans,
MIN(r.review_time) as earliest_review,
MAX(r.review_time) as latest_review
FROM pipeline.reviews_enriched r
LEFT JOIN pipeline.review_spans s ON s.review_id = r.review_id
WHERE r.job_id = $1::uuid
@@ -577,11 +579,30 @@ class Stage5Synthesizer:
LIMIT 10
""", job_id)
# Calculate analysis period from actual date range
earliest = overview["earliest_review"]
latest = overview["latest_review"]
if earliest and latest:
from datetime import datetime
days_span = (latest - earliest).days
if days_span <= 31:
analysis_period = "Last month"
elif days_span <= 90:
analysis_period = "Last 3 months"
elif days_span <= 365:
analysis_period = "Last 12 months"
else:
years = days_span // 365
analysis_period = f"Last {years} years" if years > 1 else "Last year"
else:
analysis_period = "All available data"
return {
"business_name": business or "This business",
"total_reviews": overview["total_reviews"] or 0,
"avg_rating": float(overview["avg_rating"] or 0),
"total_spans": overview["total_spans"] or 0,
"analysis_period": analysis_period,
"sentiment": [dict(r) for r in sentiment],
"top_issues": [dict(r) for r in top_issues],
"top_strengths": [dict(r) for r in top_strengths],
@@ -869,7 +890,7 @@ class Stage5Synthesizer:
generated_at=datetime.utcnow().isoformat(),
review_count=context["total_reviews"],
insight_count=context["total_spans"],
analysis_period="Last 12 months",
analysis_period=context.get("analysis_period", "All available data"),
charts=chart_data,
)
@@ -1022,7 +1043,7 @@ Generate a comprehensive analyst report based on this data."""
generated_at=datetime.utcnow().isoformat(),
review_count=ctx["total_reviews"],
insight_count=ctx["total_spans"],
analysis_period="Last 12 months",
analysis_period=ctx.get("analysis_period", "All available data"),
charts=chart_data,
)