This commit implements a plugin-like pipeline architecture with:
Pipeline Core Package (packages/pipeline-core/):
- BasePipeline abstract class all pipelines implement
- PipelineRegistry for database-backed discovery/management
- PipelineRunner for execution with status tracking
- DashboardConfig contracts for dynamic widget definitions
Database Migration (006_pipeline_registry.sql):
- pipeline.registry table for registered pipelines
- pipeline.executions table for execution history
- Views for execution stats and monitoring
ReviewIQ Pipeline Refactor:
- Implements BasePipeline interface
- Adds get_dashboard_config() with widget definitions
- Adds get_widget_data() methods for all dashboard widgets
- Maintains backward compatibility with Pipeline alias
Generic Pipeline API (api/routes/pipelines.py):
- GET /api/pipelines - List all registered pipelines
- GET /api/pipelines/{id} - Pipeline details
- POST /api/pipelines/{id}/execute - Execute pipeline
- GET /api/pipelines/{id}/dashboard - Dashboard config
- GET /api/pipelines/{id}/widgets/{w} - Widget data
- GET /api/pipelines/{id}/executions - Execution history
Frontend Dynamic Dashboard System:
- DynamicDashboard component renders from config
- WidgetRegistry maps types to components
- Widget components: StatCard, LineChart, BarChart,
PieChart, DataTable, Heatmap
- Pipeline API client library
Frontend Pipeline Pages:
- /pipelines - List all registered pipelines
- /pipelines/[id] - Dynamic dashboard for pipeline
- /pipelines/[id]/executions - Execution history
- Pipelines nav item in Sidebar
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
561 lines
18 KiB
Python
561 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generic Pipeline API endpoints.
|
|
|
|
Provides a unified API for all registered pipelines:
|
|
- List available pipelines
|
|
- Get pipeline details and metadata
|
|
- Execute pipelines
|
|
- Get dashboard configuration
|
|
- Get widget data
|
|
- List execution history
|
|
"""
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
import asyncpg
|
|
from fastapi import APIRouter, HTTPException, Query
|
|
from pydantic import BaseModel, Field
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Create router
|
|
router = APIRouter(prefix="/api/pipelines", tags=["pipelines"])
|
|
|
|
# Database pool (set by main server)
|
|
_pool: asyncpg.Pool | None = None
|
|
|
|
# Pipeline instances cache
|
|
_pipeline_instances: dict[str, Any] = {}
|
|
|
|
|
|
def set_database(pool: asyncpg.Pool) -> None:
|
|
"""Set the database pool for pipeline operations."""
|
|
global _pool
|
|
_pool = pool
|
|
|
|
|
|
# ==================== Pydantic Models ====================
|
|
|
|
|
|
class PipelineInfo(BaseModel):
|
|
"""Summary information about a pipeline."""
|
|
|
|
id: str = Field(..., description="Pipeline identifier")
|
|
name: str = Field(..., description="Display name")
|
|
description: str = Field(..., description="Human-readable description")
|
|
version: str = Field(..., description="Semantic version")
|
|
is_enabled: bool = Field(..., description="Whether pipeline is enabled")
|
|
stages: list[str] = Field(..., description="Available stages")
|
|
input_type: str = Field(..., description="Expected input type")
|
|
|
|
|
|
class PipelineDetail(PipelineInfo):
|
|
"""Detailed pipeline information."""
|
|
|
|
module_path: str = Field(..., description="Python module path")
|
|
config: dict[str, Any] | None = Field(None, description="Pipeline configuration")
|
|
created_at: str | None = Field(None, description="Registration timestamp")
|
|
updated_at: str | None = Field(None, description="Last update timestamp")
|
|
|
|
|
|
class ExecuteRequest(BaseModel):
|
|
"""Request to execute a pipeline."""
|
|
|
|
job_id: str | None = Field(None, description="Job ID to process")
|
|
business_id: str | None = Field(None, description="Business identifier")
|
|
input_data: dict[str, Any] | None = Field(None, description="Direct input data")
|
|
stages: list[str] | None = Field(None, description="Stages to run (default: all)")
|
|
options: dict[str, Any] | None = Field(None, description="Pipeline-specific options")
|
|
|
|
|
|
class ExecuteResponse(BaseModel):
|
|
"""Response from pipeline execution."""
|
|
|
|
execution_id: str = Field(..., description="Execution identifier")
|
|
pipeline_id: str = Field(..., description="Pipeline that was executed")
|
|
success: bool = Field(..., description="Whether execution succeeded")
|
|
stages_run: list[str] = Field(..., description="Stages that were run")
|
|
error: str | None = Field(None, description="Error message if failed")
|
|
|
|
|
|
class ExecutionSummary(BaseModel):
|
|
"""Summary of a pipeline execution."""
|
|
|
|
id: str = Field(..., description="Execution identifier")
|
|
pipeline_id: str = Field(..., description="Pipeline identifier")
|
|
job_id: str | None = Field(None, description="Associated job ID")
|
|
business_id: str | None = Field(None, description="Business identifier")
|
|
status: str = Field(..., description="Execution status")
|
|
stages_requested: list[str] = Field(..., description="Stages requested")
|
|
stages_completed: list[str] = Field(..., description="Stages completed")
|
|
error_message: str | None = Field(None, description="Error if failed")
|
|
started_at: str | None = Field(None, description="Start timestamp")
|
|
completed_at: str | None = Field(None, description="Completion timestamp")
|
|
created_at: str | None = Field(None, description="Creation timestamp")
|
|
|
|
|
|
class DashboardSectionModel(BaseModel):
|
|
"""Dashboard section configuration."""
|
|
|
|
id: str
|
|
title: str
|
|
description: str | None = None
|
|
widgets: list[dict[str, Any]]
|
|
collapsed: bool | None = None
|
|
|
|
|
|
class DashboardConfigModel(BaseModel):
|
|
"""Dashboard configuration for a pipeline."""
|
|
|
|
pipeline_id: str
|
|
title: str
|
|
description: str | None = None
|
|
sections: list[DashboardSectionModel]
|
|
default_time_range: str | None = None
|
|
refresh_interval: int | None = None
|
|
|
|
|
|
# ==================== Helper Functions ====================
|
|
|
|
|
|
async def _get_pipeline_instance(pipeline_id: str) -> Any:
|
|
"""Get or create a pipeline instance."""
|
|
if pipeline_id in _pipeline_instances:
|
|
return _pipeline_instances[pipeline_id]
|
|
|
|
if not _pool:
|
|
raise HTTPException(status_code=503, detail="Database not initialized")
|
|
|
|
# Look up pipeline in registry
|
|
async with _pool.acquire() as conn:
|
|
row = await conn.fetchrow(
|
|
"""
|
|
SELECT pipeline_id, module_path, is_enabled
|
|
FROM pipeline.registry
|
|
WHERE pipeline_id = $1
|
|
""",
|
|
pipeline_id,
|
|
)
|
|
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
|
|
|
|
if not row["is_enabled"]:
|
|
raise HTTPException(status_code=400, detail=f"Pipeline is disabled: {pipeline_id}")
|
|
|
|
# Import and instantiate
|
|
try:
|
|
module_path = row["module_path"]
|
|
module_name, class_name = module_path.rsplit(":", 1)
|
|
|
|
import importlib
|
|
|
|
module = importlib.import_module(module_name)
|
|
cls = getattr(module, class_name)
|
|
instance = cls()
|
|
|
|
# Initialize the pipeline
|
|
await instance.initialize()
|
|
|
|
_pipeline_instances[pipeline_id] = instance
|
|
return instance
|
|
|
|
except Exception as e:
|
|
log.exception(f"Failed to load pipeline {pipeline_id}")
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to load pipeline: {e}"
|
|
)
|
|
|
|
|
|
# ==================== API Endpoints ====================
|
|
|
|
|
|
@router.get("/", response_model=list[PipelineInfo])
|
|
async def list_pipelines(
|
|
enabled_only: bool = Query(True, description="Only return enabled pipelines"),
|
|
) -> list[PipelineInfo]:
|
|
"""
|
|
List all registered pipelines.
|
|
|
|
Returns summary information for each pipeline including name, version,
|
|
available stages, and enabled status.
|
|
"""
|
|
if not _pool:
|
|
raise HTTPException(status_code=503, detail="Database not initialized")
|
|
|
|
async with _pool.acquire() as conn:
|
|
if enabled_only:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT pipeline_id, name, description, version,
|
|
is_enabled, stages, input_type
|
|
FROM pipeline.registry
|
|
WHERE is_enabled = TRUE
|
|
ORDER BY name
|
|
"""
|
|
)
|
|
else:
|
|
rows = await conn.fetch(
|
|
"""
|
|
SELECT pipeline_id, name, description, version,
|
|
is_enabled, stages, input_type
|
|
FROM pipeline.registry
|
|
ORDER BY name
|
|
"""
|
|
)
|
|
|
|
return [
|
|
PipelineInfo(
|
|
id=row["pipeline_id"],
|
|
name=row["name"],
|
|
description=row["description"] or "",
|
|
version=row["version"],
|
|
is_enabled=row["is_enabled"],
|
|
stages=row["stages"] or [],
|
|
input_type=row["input_type"] or "dict",
|
|
)
|
|
for row in rows
|
|
]
|
|
|
|
|
|
@router.get("/{pipeline_id}", response_model=PipelineDetail)
|
|
async def get_pipeline(pipeline_id: str) -> PipelineDetail:
|
|
"""
|
|
Get detailed information about a pipeline.
|
|
|
|
Includes metadata, configuration, and registration timestamps.
|
|
"""
|
|
if not _pool:
|
|
raise HTTPException(status_code=503, detail="Database not initialized")
|
|
|
|
async with _pool.acquire() as conn:
|
|
row = await conn.fetchrow(
|
|
"""
|
|
SELECT pipeline_id, name, description, version, module_path,
|
|
is_enabled, stages, input_type, config,
|
|
created_at, updated_at
|
|
FROM pipeline.registry
|
|
WHERE pipeline_id = $1
|
|
""",
|
|
pipeline_id,
|
|
)
|
|
|
|
if not row:
|
|
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
|
|
|
|
return PipelineDetail(
|
|
id=row["pipeline_id"],
|
|
name=row["name"],
|
|
description=row["description"] or "",
|
|
version=row["version"],
|
|
is_enabled=row["is_enabled"],
|
|
stages=row["stages"] or [],
|
|
input_type=row["input_type"] or "dict",
|
|
module_path=row["module_path"],
|
|
config=row["config"],
|
|
created_at=row["created_at"].isoformat() if row["created_at"] else None,
|
|
updated_at=row["updated_at"].isoformat() if row["updated_at"] else None,
|
|
)
|
|
|
|
|
|
@router.post("/{pipeline_id}/execute", response_model=ExecuteResponse)
|
|
async def execute_pipeline(
|
|
pipeline_id: str,
|
|
request: ExecuteRequest,
|
|
) -> ExecuteResponse:
|
|
"""
|
|
Execute a pipeline.
|
|
|
|
The pipeline can be executed with:
|
|
- A job_id to process an existing scraper job
|
|
- Direct input_data for testing
|
|
- Specific stages to run (default: all)
|
|
"""
|
|
import uuid
|
|
|
|
pipeline = await _get_pipeline_instance(pipeline_id)
|
|
|
|
# Prepare input data
|
|
input_data = request.input_data or {}
|
|
if request.job_id:
|
|
input_data["job_id"] = request.job_id
|
|
if request.business_id:
|
|
input_data["business_id"] = request.business_id
|
|
|
|
# Create execution record
|
|
execution_id = str(uuid.uuid4())
|
|
stages = request.stages or pipeline.get_stage_names()
|
|
|
|
if _pool:
|
|
async with _pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
INSERT INTO pipeline.executions (
|
|
id, pipeline_id, job_id, business_id,
|
|
status, stages_requested, created_at
|
|
)
|
|
VALUES ($1, $2, $3, $4, 'running', $5, NOW())
|
|
""",
|
|
uuid.UUID(execution_id),
|
|
pipeline_id,
|
|
uuid.UUID(request.job_id) if request.job_id else None,
|
|
request.business_id,
|
|
stages,
|
|
)
|
|
|
|
try:
|
|
# Execute pipeline
|
|
result = await pipeline.process(input_data, stages=stages)
|
|
|
|
# Update execution status
|
|
if _pool:
|
|
async with _pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
UPDATE pipeline.executions
|
|
SET status = $2, stages_completed = $3, error_message = $4,
|
|
completed_at = NOW()
|
|
WHERE id = $1
|
|
""",
|
|
uuid.UUID(execution_id),
|
|
"completed" if result.success else "failed",
|
|
result.stages_run,
|
|
result.error,
|
|
)
|
|
|
|
return ExecuteResponse(
|
|
execution_id=execution_id,
|
|
pipeline_id=pipeline_id,
|
|
success=result.success,
|
|
stages_run=result.stages_run,
|
|
error=result.error,
|
|
)
|
|
|
|
except Exception as e:
|
|
log.exception(f"Pipeline execution failed: {e}")
|
|
|
|
# Update execution status
|
|
if _pool:
|
|
async with _pool.acquire() as conn:
|
|
await conn.execute(
|
|
"""
|
|
UPDATE pipeline.executions
|
|
SET status = 'failed', error_message = $2, completed_at = NOW()
|
|
WHERE id = $1
|
|
""",
|
|
uuid.UUID(execution_id),
|
|
str(e),
|
|
)
|
|
|
|
raise HTTPException(status_code=500, detail=f"Execution failed: {e}")
|
|
|
|
|
|
@router.get("/{pipeline_id}/executions", response_model=list[ExecutionSummary])
|
|
async def list_executions(
|
|
pipeline_id: str,
|
|
status: str | None = Query(None, description="Filter by status"),
|
|
limit: int = Query(50, ge=1, le=200, description="Max results"),
|
|
offset: int = Query(0, ge=0, description="Offset for pagination"),
|
|
) -> list[ExecutionSummary]:
|
|
"""
|
|
List execution history for a pipeline.
|
|
|
|
Can filter by status and paginate results.
|
|
"""
|
|
if not _pool:
|
|
raise HTTPException(status_code=503, detail="Database not initialized")
|
|
|
|
conditions = ["pipeline_id = $1"]
|
|
params: list[Any] = [pipeline_id]
|
|
param_idx = 2
|
|
|
|
if status:
|
|
conditions.append(f"status = ${param_idx}")
|
|
params.append(status)
|
|
param_idx += 1
|
|
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
async with _pool.acquire() as conn:
|
|
rows = await conn.fetch(
|
|
f"""
|
|
SELECT id, pipeline_id, job_id, business_id, status,
|
|
stages_requested, stages_completed, error_message,
|
|
started_at, completed_at, created_at
|
|
FROM pipeline.executions
|
|
WHERE {where_clause}
|
|
ORDER BY created_at DESC
|
|
LIMIT ${param_idx} OFFSET ${param_idx + 1}
|
|
""",
|
|
*params,
|
|
limit,
|
|
offset,
|
|
)
|
|
|
|
return [
|
|
ExecutionSummary(
|
|
id=str(row["id"]),
|
|
pipeline_id=row["pipeline_id"],
|
|
job_id=str(row["job_id"]) if row["job_id"] else None,
|
|
business_id=row["business_id"],
|
|
status=row["status"],
|
|
stages_requested=row["stages_requested"] or [],
|
|
stages_completed=row["stages_completed"] or [],
|
|
error_message=row["error_message"],
|
|
started_at=row["started_at"].isoformat() if row["started_at"] else None,
|
|
completed_at=row["completed_at"].isoformat() if row["completed_at"] else None,
|
|
created_at=row["created_at"].isoformat() if row["created_at"] else None,
|
|
)
|
|
for row in rows
|
|
]
|
|
|
|
|
|
@router.get("/{pipeline_id}/dashboard", response_model=DashboardConfigModel)
|
|
async def get_dashboard_config(pipeline_id: str) -> DashboardConfigModel:
|
|
"""
|
|
Get dashboard configuration for a pipeline.
|
|
|
|
Returns the widget configuration that the frontend uses to render
|
|
the pipeline's dynamic dashboard.
|
|
"""
|
|
pipeline = await _get_pipeline_instance(pipeline_id)
|
|
|
|
try:
|
|
config = pipeline.get_dashboard_config()
|
|
|
|
return DashboardConfigModel(
|
|
pipeline_id=config["pipeline_id"],
|
|
title=config["title"],
|
|
description=config.get("description"),
|
|
sections=[
|
|
DashboardSectionModel(
|
|
id=s["id"],
|
|
title=s["title"],
|
|
description=s.get("description"),
|
|
widgets=s["widgets"],
|
|
collapsed=s.get("collapsed"),
|
|
)
|
|
for s in config["sections"]
|
|
],
|
|
default_time_range=config.get("default_time_range"),
|
|
refresh_interval=config.get("refresh_interval"),
|
|
)
|
|
|
|
except Exception as e:
|
|
log.exception(f"Failed to get dashboard config: {e}")
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to get dashboard config: {e}"
|
|
)
|
|
|
|
|
|
@router.get("/{pipeline_id}/widgets/{widget_id}")
|
|
async def get_widget_data(
|
|
pipeline_id: str,
|
|
widget_id: str,
|
|
business_id: str | None = Query(None, description="Filter by business"),
|
|
time_range: str = Query("30d", description="Time range (e.g., 7d, 30d, 90d)"),
|
|
page: int = Query(1, ge=1, description="Page number for paginated widgets"),
|
|
page_size: int = Query(10, ge=1, le=100, description="Items per page"),
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Get data for a specific dashboard widget.
|
|
|
|
The response format depends on the widget type. Common formats:
|
|
- stat_card: {value, trend, ...}
|
|
- chart: {data: [{x, y, ...}, ...]}
|
|
- table: {data: [...], total: n}
|
|
"""
|
|
pipeline = await _get_pipeline_instance(pipeline_id)
|
|
|
|
try:
|
|
params = {
|
|
"business_id": business_id,
|
|
"time_range": time_range,
|
|
"page": page,
|
|
"page_size": page_size,
|
|
}
|
|
|
|
data = await pipeline.get_widget_data(widget_id, params)
|
|
return data
|
|
|
|
except Exception as e:
|
|
log.exception(f"Failed to get widget data: {e}")
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to get widget data: {e}"
|
|
)
|
|
|
|
|
|
@router.post("/{pipeline_id}/enable")
|
|
async def enable_pipeline(pipeline_id: str) -> dict[str, str]:
|
|
"""Enable a disabled pipeline."""
|
|
if not _pool:
|
|
raise HTTPException(status_code=503, detail="Database not initialized")
|
|
|
|
async with _pool.acquire() as conn:
|
|
result = await conn.execute(
|
|
"""
|
|
UPDATE pipeline.registry
|
|
SET is_enabled = TRUE, updated_at = NOW()
|
|
WHERE pipeline_id = $1
|
|
""",
|
|
pipeline_id,
|
|
)
|
|
|
|
if result.split()[-1] == "0":
|
|
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
|
|
|
|
# Clear cached instance
|
|
_pipeline_instances.pop(pipeline_id, None)
|
|
|
|
return {"status": "enabled", "pipeline_id": pipeline_id}
|
|
|
|
|
|
@router.post("/{pipeline_id}/disable")
|
|
async def disable_pipeline(pipeline_id: str) -> dict[str, str]:
|
|
"""Disable a pipeline."""
|
|
if not _pool:
|
|
raise HTTPException(status_code=503, detail="Database not initialized")
|
|
|
|
async with _pool.acquire() as conn:
|
|
result = await conn.execute(
|
|
"""
|
|
UPDATE pipeline.registry
|
|
SET is_enabled = FALSE, updated_at = NOW()
|
|
WHERE pipeline_id = $1
|
|
""",
|
|
pipeline_id,
|
|
)
|
|
|
|
if result.split()[-1] == "0":
|
|
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
|
|
|
|
# Clear cached instance
|
|
_pipeline_instances.pop(pipeline_id, None)
|
|
|
|
return {"status": "disabled", "pipeline_id": pipeline_id}
|
|
|
|
|
|
@router.get("/{pipeline_id}/health")
|
|
async def pipeline_health(pipeline_id: str) -> dict[str, Any]:
|
|
"""
|
|
Check pipeline health.
|
|
|
|
Returns health status and any issues detected.
|
|
"""
|
|
pipeline = await _get_pipeline_instance(pipeline_id)
|
|
|
|
try:
|
|
health = await pipeline.health_check()
|
|
return {
|
|
"pipeline_id": pipeline_id,
|
|
**health,
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"pipeline_id": pipeline_id,
|
|
"healthy": False,
|
|
"error": str(e),
|
|
}
|