fix(pipeline): Update stage result to use new synthesis fields
Change action_matrix reference from old 'actions' field. Add issues_identified and health_score to stage result data. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -73,21 +73,27 @@ STAGE_NAME_TO_NUM = {"normalize": 1, "classify": 2, "route": 3, "aggregate": 4,
|
|||||||
STAGE_NUM_TO_NAME = {1: "normalize", 2: "classify", 3: "route", 4: "aggregate", 5: "synthesize"}
|
STAGE_NUM_TO_NAME = {1: "normalize", 2: "classify", 3: "route", 4: "aggregate", 5: "synthesize"}
|
||||||
|
|
||||||
|
|
||||||
def _parse_relative_date(date_str: str | None, default_to_now: bool = True) -> datetime | None:
|
def _parse_relative_date(
|
||||||
|
date_str: str | None,
|
||||||
|
default_to_now: bool = True,
|
||||||
|
anchor_date: datetime | None = None,
|
||||||
|
) -> datetime | None:
|
||||||
"""Parse relative date strings like '10 months ago' into datetime objects.
|
"""Parse relative date strings like '10 months ago' into datetime objects.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
date_str: A relative date string (e.g., "10 months ago", "2 weeks ago")
|
date_str: A relative date string (e.g., "10 months ago", "2 weeks ago")
|
||||||
or an ISO date string, or None.
|
or an ISO date string, or None.
|
||||||
default_to_now: If True, returns current datetime when parsing fails.
|
default_to_now: If True, returns current datetime when parsing fails.
|
||||||
|
anchor_date: Reference date for relative calculations. If None, uses now.
|
||||||
|
For scraped reviews, this should be the scrape time.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A datetime object, or None if parsing fails and default_to_now is False.
|
A datetime object, or None if parsing fails and default_to_now is False.
|
||||||
"""
|
"""
|
||||||
now = datetime.now()
|
reference = anchor_date or datetime.now()
|
||||||
|
|
||||||
if not date_str:
|
if not date_str:
|
||||||
return now if default_to_now else None
|
return reference if default_to_now else None
|
||||||
|
|
||||||
# Try to parse as ISO date first
|
# Try to parse as ISO date first
|
||||||
try:
|
try:
|
||||||
@@ -108,25 +114,25 @@ def _parse_relative_date(date_str: str | None, default_to_now: bool = True) -> d
|
|||||||
unit = match.group(2)
|
unit = match.group(2)
|
||||||
|
|
||||||
if unit == 'second':
|
if unit == 'second':
|
||||||
return now - timedelta(seconds=amount)
|
return reference - timedelta(seconds=amount)
|
||||||
elif unit == 'minute':
|
elif unit == 'minute':
|
||||||
return now - timedelta(minutes=amount)
|
return reference - timedelta(minutes=amount)
|
||||||
elif unit == 'hour':
|
elif unit == 'hour':
|
||||||
return now - timedelta(hours=amount)
|
return reference - timedelta(hours=amount)
|
||||||
elif unit == 'day':
|
elif unit == 'day':
|
||||||
return now - timedelta(days=amount)
|
return reference - timedelta(days=amount)
|
||||||
elif unit == 'week':
|
elif unit == 'week':
|
||||||
return now - timedelta(weeks=amount)
|
return reference - timedelta(weeks=amount)
|
||||||
elif unit == 'month':
|
elif unit == 'month':
|
||||||
# Approximate months as 30 days
|
# Approximate months as 30 days
|
||||||
return now - timedelta(days=amount * 30)
|
return reference - timedelta(days=amount * 30)
|
||||||
elif unit == 'year':
|
elif unit == 'year':
|
||||||
# Approximate years as 365 days
|
# Approximate years as 365 days
|
||||||
return now - timedelta(days=amount * 365)
|
return reference - timedelta(days=amount * 365)
|
||||||
|
|
||||||
# If we can't parse it, return now or None
|
# If we can't parse it, return reference or None
|
||||||
logger.warning(f"Could not parse relative date: {date_str}")
|
logger.warning(f"Could not parse relative date: {date_str}")
|
||||||
return now if default_to_now else None
|
return reference if default_to_now else None
|
||||||
|
|
||||||
|
|
||||||
class PipelineResult:
|
class PipelineResult:
|
||||||
@@ -465,8 +471,9 @@ class ReviewIQPipeline(BasePipeline):
|
|||||||
stage="synthesize",
|
stage="synthesize",
|
||||||
success=True,
|
success=True,
|
||||||
data={
|
data={
|
||||||
"actions_generated": len(stage5_result.action_plan) if stage5_result else 0,
|
"actions_generated": len(stage5_result.action_matrix) if stage5_result else 0,
|
||||||
"has_narrative": bool(stage5_result and stage5_result.executive_narrative),
|
"issues_identified": len(stage5_result.critical_issues) if stage5_result else 0,
|
||||||
|
"health_score": stage5_result.executive_summary.health_score if stage5_result else 0,
|
||||||
},
|
},
|
||||||
error=None,
|
error=None,
|
||||||
duration_ms=duration_ms,
|
duration_ms=duration_ms,
|
||||||
@@ -893,7 +900,8 @@ class ReviewIQPipeline(BasePipeline):
|
|||||||
metadata->>'category' as category,
|
metadata->>'category' as category,
|
||||||
metadata->>'total_reviews' as total_reviews,
|
metadata->>'total_reviews' as total_reviews,
|
||||||
metadata->>'average_rating' as average_rating,
|
metadata->>'average_rating' as average_rating,
|
||||||
scraper_version
|
scraper_version,
|
||||||
|
created_at
|
||||||
FROM public.jobs
|
FROM public.jobs
|
||||||
WHERE job_id = $1::uuid
|
WHERE job_id = $1::uuid
|
||||||
""",
|
""",
|
||||||
@@ -906,6 +914,14 @@ class ReviewIQPipeline(BasePipeline):
|
|||||||
if isinstance(reviews_data, str):
|
if isinstance(reviews_data, str):
|
||||||
logger.info("Parsing reviews_data JSON string")
|
logger.info("Parsing reviews_data JSON string")
|
||||||
reviews_data = json.loads(reviews_data)
|
reviews_data = json.loads(reviews_data)
|
||||||
|
|
||||||
|
# Use job created_at as anchor for relative date parsing
|
||||||
|
# This ensures "2 months ago" is relative to scrape time, not pipeline run time
|
||||||
|
scrape_time = row["created_at"]
|
||||||
|
if scrape_time and hasattr(scrape_time, 'replace'):
|
||||||
|
# Make timezone-naive for consistency
|
||||||
|
scrape_time = scrape_time.replace(tzinfo=None)
|
||||||
|
|
||||||
# Convert reviews_data to RawReview format
|
# Convert reviews_data to RawReview format
|
||||||
# Handle both API format (review_id, author, rating) and scraper format (reviewId, name, stars)
|
# Handle both API format (review_id, author, rating) and scraper format (reviewId, name, stars)
|
||||||
reviews = []
|
reviews = []
|
||||||
@@ -914,9 +930,15 @@ class ReviewIQPipeline(BasePipeline):
|
|||||||
# Skip if review is somehow a string
|
# Skip if review is somehow a string
|
||||||
logger.warning(f"Skipping review {i}: got string instead of dict")
|
logger.warning(f"Skipping review {i}: got string instead of dict")
|
||||||
continue
|
continue
|
||||||
# Parse the review time (may be relative like "10 months ago")
|
# Parse the review time - prefer absolute dates over relative strings
|
||||||
raw_time = review.get("timestamp") or review.get("publishedAtDate") or ""
|
# Priority: centerDate (scraper estimate) > publishedAtDate > timestamp (relative)
|
||||||
parsed_time = _parse_relative_date(raw_time)
|
raw_time = (
|
||||||
|
review.get("centerDate")
|
||||||
|
or review.get("publishedAtDate")
|
||||||
|
or review.get("timestamp")
|
||||||
|
or ""
|
||||||
|
)
|
||||||
|
parsed_time = _parse_relative_date(raw_time, anchor_date=scrape_time)
|
||||||
|
|
||||||
reviews.append({
|
reviews.append({
|
||||||
"review_id": review.get("review_id") or review.get("reviewId") or f"review_{i}",
|
"review_id": review.get("review_id") or review.get("reviewId") or f"review_{i}",
|
||||||
@@ -1145,8 +1167,6 @@ class ReviewIQPipeline(BasePipeline):
|
|||||||
|
|
||||||
async def _run_synthesize(self, job_id: str, execution_id: str):
|
async def _run_synthesize(self, job_id: str, execution_id: str):
|
||||||
"""Run AI synthesis stage to generate narratives and action plans."""
|
"""Run AI synthesis stage to generate narratives and action plans."""
|
||||||
from reviewiq_pipeline.stages.stage5_synthesize import Synthesis
|
|
||||||
|
|
||||||
# Create LLM client for synthesis
|
# Create LLM client for synthesis
|
||||||
llm_client = LLMClient.create(self._config)
|
llm_client = LLMClient.create(self._config)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user