feat: Add extensible multi-pipeline integration system

This commit implements a plugin-like pipeline architecture with:

Pipeline Core Package (packages/pipeline-core/):
- BasePipeline abstract class all pipelines implement
- PipelineRegistry for database-backed discovery/management
- PipelineRunner for execution with status tracking
- DashboardConfig contracts for dynamic widget definitions

Database Migration (006_pipeline_registry.sql):
- pipeline.registry table for registered pipelines
- pipeline.executions table for execution history
- Views for execution stats and monitoring

ReviewIQ Pipeline Refactor:
- Implements BasePipeline interface
- Adds get_dashboard_config() with widget definitions
- Adds get_widget_data() methods for all dashboard widgets
- Maintains backward compatibility with Pipeline alias

Generic Pipeline API (api/routes/pipelines.py):
- GET /api/pipelines - List all registered pipelines
- GET /api/pipelines/{id} - Pipeline details
- POST /api/pipelines/{id}/execute - Execute pipeline
- GET /api/pipelines/{id}/dashboard - Dashboard config
- GET /api/pipelines/{id}/widgets/{w} - Widget data
- GET /api/pipelines/{id}/executions - Execution history

Frontend Dynamic Dashboard System:
- DynamicDashboard component renders from config
- WidgetRegistry maps types to components
- Widget components: StatCard, LineChart, BarChart,
  PieChart, DataTable, Heatmap
- Pipeline API client library

Frontend Pipeline Pages:
- /pipelines - List all registered pipelines
- /pipelines/[id] - Dynamic dashboard for pipeline
- /pipelines/[id]/executions - Execution history
- Pipelines nav item in Sidebar

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 19:05:38 +00:00
parent d64f06ba9e
commit 824634aa76
30 changed files with 5697 additions and 95 deletions

View File

@@ -6,6 +6,7 @@ This module exports all route modules for easy import into the main server.
from api.routes.batches import router as batches_router, set_database as set_batches_db
from api.routes.dashboard import router as dashboard_router, set_database as set_dashboard_db
from api.routes.admin import router as admin_router, set_database as set_admin_db
from api.routes.pipelines import router as pipelines_router, set_database as set_pipelines_db
__all__ = [
'batches_router',
@@ -14,4 +15,6 @@ __all__ = [
'set_dashboard_db',
'admin_router',
'set_admin_db',
'pipelines_router',
'set_pipelines_db',
]

560
api/routes/pipelines.py Normal file
View File

@@ -0,0 +1,560 @@
#!/usr/bin/env python3
"""
Generic Pipeline API endpoints.
Provides a unified API for all registered pipelines:
- List available pipelines
- Get pipeline details and metadata
- Execute pipelines
- Get dashboard configuration
- Get widget data
- List execution history
"""
import logging
from typing import Any
import asyncpg
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel, Field
log = logging.getLogger(__name__)
# Create router
router = APIRouter(prefix="/api/pipelines", tags=["pipelines"])
# Database pool (set by main server)
_pool: asyncpg.Pool | None = None
# Pipeline instances cache
_pipeline_instances: dict[str, Any] = {}
def set_database(pool: asyncpg.Pool) -> None:
"""Set the database pool for pipeline operations."""
global _pool
_pool = pool
# ==================== Pydantic Models ====================
class PipelineInfo(BaseModel):
"""Summary information about a pipeline."""
id: str = Field(..., description="Pipeline identifier")
name: str = Field(..., description="Display name")
description: str = Field(..., description="Human-readable description")
version: str = Field(..., description="Semantic version")
is_enabled: bool = Field(..., description="Whether pipeline is enabled")
stages: list[str] = Field(..., description="Available stages")
input_type: str = Field(..., description="Expected input type")
class PipelineDetail(PipelineInfo):
"""Detailed pipeline information."""
module_path: str = Field(..., description="Python module path")
config: dict[str, Any] | None = Field(None, description="Pipeline configuration")
created_at: str | None = Field(None, description="Registration timestamp")
updated_at: str | None = Field(None, description="Last update timestamp")
class ExecuteRequest(BaseModel):
"""Request to execute a pipeline."""
job_id: str | None = Field(None, description="Job ID to process")
business_id: str | None = Field(None, description="Business identifier")
input_data: dict[str, Any] | None = Field(None, description="Direct input data")
stages: list[str] | None = Field(None, description="Stages to run (default: all)")
options: dict[str, Any] | None = Field(None, description="Pipeline-specific options")
class ExecuteResponse(BaseModel):
"""Response from pipeline execution."""
execution_id: str = Field(..., description="Execution identifier")
pipeline_id: str = Field(..., description="Pipeline that was executed")
success: bool = Field(..., description="Whether execution succeeded")
stages_run: list[str] = Field(..., description="Stages that were run")
error: str | None = Field(None, description="Error message if failed")
class ExecutionSummary(BaseModel):
"""Summary of a pipeline execution."""
id: str = Field(..., description="Execution identifier")
pipeline_id: str = Field(..., description="Pipeline identifier")
job_id: str | None = Field(None, description="Associated job ID")
business_id: str | None = Field(None, description="Business identifier")
status: str = Field(..., description="Execution status")
stages_requested: list[str] = Field(..., description="Stages requested")
stages_completed: list[str] = Field(..., description="Stages completed")
error_message: str | None = Field(None, description="Error if failed")
started_at: str | None = Field(None, description="Start timestamp")
completed_at: str | None = Field(None, description="Completion timestamp")
created_at: str | None = Field(None, description="Creation timestamp")
class DashboardSectionModel(BaseModel):
"""Dashboard section configuration."""
id: str
title: str
description: str | None = None
widgets: list[dict[str, Any]]
collapsed: bool | None = None
class DashboardConfigModel(BaseModel):
"""Dashboard configuration for a pipeline."""
pipeline_id: str
title: str
description: str | None = None
sections: list[DashboardSectionModel]
default_time_range: str | None = None
refresh_interval: int | None = None
# ==================== Helper Functions ====================
async def _get_pipeline_instance(pipeline_id: str) -> Any:
"""Get or create a pipeline instance."""
if pipeline_id in _pipeline_instances:
return _pipeline_instances[pipeline_id]
if not _pool:
raise HTTPException(status_code=503, detail="Database not initialized")
# Look up pipeline in registry
async with _pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT pipeline_id, module_path, is_enabled
FROM pipeline.registry
WHERE pipeline_id = $1
""",
pipeline_id,
)
if not row:
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
if not row["is_enabled"]:
raise HTTPException(status_code=400, detail=f"Pipeline is disabled: {pipeline_id}")
# Import and instantiate
try:
module_path = row["module_path"]
module_name, class_name = module_path.rsplit(":", 1)
import importlib
module = importlib.import_module(module_name)
cls = getattr(module, class_name)
instance = cls()
# Initialize the pipeline
await instance.initialize()
_pipeline_instances[pipeline_id] = instance
return instance
except Exception as e:
log.exception(f"Failed to load pipeline {pipeline_id}")
raise HTTPException(
status_code=500, detail=f"Failed to load pipeline: {e}"
)
# ==================== API Endpoints ====================
@router.get("/", response_model=list[PipelineInfo])
async def list_pipelines(
enabled_only: bool = Query(True, description="Only return enabled pipelines"),
) -> list[PipelineInfo]:
"""
List all registered pipelines.
Returns summary information for each pipeline including name, version,
available stages, and enabled status.
"""
if not _pool:
raise HTTPException(status_code=503, detail="Database not initialized")
async with _pool.acquire() as conn:
if enabled_only:
rows = await conn.fetch(
"""
SELECT pipeline_id, name, description, version,
is_enabled, stages, input_type
FROM pipeline.registry
WHERE is_enabled = TRUE
ORDER BY name
"""
)
else:
rows = await conn.fetch(
"""
SELECT pipeline_id, name, description, version,
is_enabled, stages, input_type
FROM pipeline.registry
ORDER BY name
"""
)
return [
PipelineInfo(
id=row["pipeline_id"],
name=row["name"],
description=row["description"] or "",
version=row["version"],
is_enabled=row["is_enabled"],
stages=row["stages"] or [],
input_type=row["input_type"] or "dict",
)
for row in rows
]
@router.get("/{pipeline_id}", response_model=PipelineDetail)
async def get_pipeline(pipeline_id: str) -> PipelineDetail:
"""
Get detailed information about a pipeline.
Includes metadata, configuration, and registration timestamps.
"""
if not _pool:
raise HTTPException(status_code=503, detail="Database not initialized")
async with _pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT pipeline_id, name, description, version, module_path,
is_enabled, stages, input_type, config,
created_at, updated_at
FROM pipeline.registry
WHERE pipeline_id = $1
""",
pipeline_id,
)
if not row:
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
return PipelineDetail(
id=row["pipeline_id"],
name=row["name"],
description=row["description"] or "",
version=row["version"],
is_enabled=row["is_enabled"],
stages=row["stages"] or [],
input_type=row["input_type"] or "dict",
module_path=row["module_path"],
config=row["config"],
created_at=row["created_at"].isoformat() if row["created_at"] else None,
updated_at=row["updated_at"].isoformat() if row["updated_at"] else None,
)
@router.post("/{pipeline_id}/execute", response_model=ExecuteResponse)
async def execute_pipeline(
pipeline_id: str,
request: ExecuteRequest,
) -> ExecuteResponse:
"""
Execute a pipeline.
The pipeline can be executed with:
- A job_id to process an existing scraper job
- Direct input_data for testing
- Specific stages to run (default: all)
"""
import uuid
pipeline = await _get_pipeline_instance(pipeline_id)
# Prepare input data
input_data = request.input_data or {}
if request.job_id:
input_data["job_id"] = request.job_id
if request.business_id:
input_data["business_id"] = request.business_id
# Create execution record
execution_id = str(uuid.uuid4())
stages = request.stages or pipeline.get_stage_names()
if _pool:
async with _pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO pipeline.executions (
id, pipeline_id, job_id, business_id,
status, stages_requested, created_at
)
VALUES ($1, $2, $3, $4, 'running', $5, NOW())
""",
uuid.UUID(execution_id),
pipeline_id,
uuid.UUID(request.job_id) if request.job_id else None,
request.business_id,
stages,
)
try:
# Execute pipeline
result = await pipeline.process(input_data, stages=stages)
# Update execution status
if _pool:
async with _pool.acquire() as conn:
await conn.execute(
"""
UPDATE pipeline.executions
SET status = $2, stages_completed = $3, error_message = $4,
completed_at = NOW()
WHERE id = $1
""",
uuid.UUID(execution_id),
"completed" if result.success else "failed",
result.stages_run,
result.error,
)
return ExecuteResponse(
execution_id=execution_id,
pipeline_id=pipeline_id,
success=result.success,
stages_run=result.stages_run,
error=result.error,
)
except Exception as e:
log.exception(f"Pipeline execution failed: {e}")
# Update execution status
if _pool:
async with _pool.acquire() as conn:
await conn.execute(
"""
UPDATE pipeline.executions
SET status = 'failed', error_message = $2, completed_at = NOW()
WHERE id = $1
""",
uuid.UUID(execution_id),
str(e),
)
raise HTTPException(status_code=500, detail=f"Execution failed: {e}")
@router.get("/{pipeline_id}/executions", response_model=list[ExecutionSummary])
async def list_executions(
pipeline_id: str,
status: str | None = Query(None, description="Filter by status"),
limit: int = Query(50, ge=1, le=200, description="Max results"),
offset: int = Query(0, ge=0, description="Offset for pagination"),
) -> list[ExecutionSummary]:
"""
List execution history for a pipeline.
Can filter by status and paginate results.
"""
if not _pool:
raise HTTPException(status_code=503, detail="Database not initialized")
conditions = ["pipeline_id = $1"]
params: list[Any] = [pipeline_id]
param_idx = 2
if status:
conditions.append(f"status = ${param_idx}")
params.append(status)
param_idx += 1
where_clause = " AND ".join(conditions)
async with _pool.acquire() as conn:
rows = await conn.fetch(
f"""
SELECT id, pipeline_id, job_id, business_id, status,
stages_requested, stages_completed, error_message,
started_at, completed_at, created_at
FROM pipeline.executions
WHERE {where_clause}
ORDER BY created_at DESC
LIMIT ${param_idx} OFFSET ${param_idx + 1}
""",
*params,
limit,
offset,
)
return [
ExecutionSummary(
id=str(row["id"]),
pipeline_id=row["pipeline_id"],
job_id=str(row["job_id"]) if row["job_id"] else None,
business_id=row["business_id"],
status=row["status"],
stages_requested=row["stages_requested"] or [],
stages_completed=row["stages_completed"] or [],
error_message=row["error_message"],
started_at=row["started_at"].isoformat() if row["started_at"] else None,
completed_at=row["completed_at"].isoformat() if row["completed_at"] else None,
created_at=row["created_at"].isoformat() if row["created_at"] else None,
)
for row in rows
]
@router.get("/{pipeline_id}/dashboard", response_model=DashboardConfigModel)
async def get_dashboard_config(pipeline_id: str) -> DashboardConfigModel:
"""
Get dashboard configuration for a pipeline.
Returns the widget configuration that the frontend uses to render
the pipeline's dynamic dashboard.
"""
pipeline = await _get_pipeline_instance(pipeline_id)
try:
config = pipeline.get_dashboard_config()
return DashboardConfigModel(
pipeline_id=config["pipeline_id"],
title=config["title"],
description=config.get("description"),
sections=[
DashboardSectionModel(
id=s["id"],
title=s["title"],
description=s.get("description"),
widgets=s["widgets"],
collapsed=s.get("collapsed"),
)
for s in config["sections"]
],
default_time_range=config.get("default_time_range"),
refresh_interval=config.get("refresh_interval"),
)
except Exception as e:
log.exception(f"Failed to get dashboard config: {e}")
raise HTTPException(
status_code=500, detail=f"Failed to get dashboard config: {e}"
)
@router.get("/{pipeline_id}/widgets/{widget_id}")
async def get_widget_data(
pipeline_id: str,
widget_id: str,
business_id: str | None = Query(None, description="Filter by business"),
time_range: str = Query("30d", description="Time range (e.g., 7d, 30d, 90d)"),
page: int = Query(1, ge=1, description="Page number for paginated widgets"),
page_size: int = Query(10, ge=1, le=100, description="Items per page"),
) -> dict[str, Any]:
"""
Get data for a specific dashboard widget.
The response format depends on the widget type. Common formats:
- stat_card: {value, trend, ...}
- chart: {data: [{x, y, ...}, ...]}
- table: {data: [...], total: n}
"""
pipeline = await _get_pipeline_instance(pipeline_id)
try:
params = {
"business_id": business_id,
"time_range": time_range,
"page": page,
"page_size": page_size,
}
data = await pipeline.get_widget_data(widget_id, params)
return data
except Exception as e:
log.exception(f"Failed to get widget data: {e}")
raise HTTPException(
status_code=500, detail=f"Failed to get widget data: {e}"
)
@router.post("/{pipeline_id}/enable")
async def enable_pipeline(pipeline_id: str) -> dict[str, str]:
"""Enable a disabled pipeline."""
if not _pool:
raise HTTPException(status_code=503, detail="Database not initialized")
async with _pool.acquire() as conn:
result = await conn.execute(
"""
UPDATE pipeline.registry
SET is_enabled = TRUE, updated_at = NOW()
WHERE pipeline_id = $1
""",
pipeline_id,
)
if result.split()[-1] == "0":
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
# Clear cached instance
_pipeline_instances.pop(pipeline_id, None)
return {"status": "enabled", "pipeline_id": pipeline_id}
@router.post("/{pipeline_id}/disable")
async def disable_pipeline(pipeline_id: str) -> dict[str, str]:
"""Disable a pipeline."""
if not _pool:
raise HTTPException(status_code=503, detail="Database not initialized")
async with _pool.acquire() as conn:
result = await conn.execute(
"""
UPDATE pipeline.registry
SET is_enabled = FALSE, updated_at = NOW()
WHERE pipeline_id = $1
""",
pipeline_id,
)
if result.split()[-1] == "0":
raise HTTPException(status_code=404, detail=f"Pipeline not found: {pipeline_id}")
# Clear cached instance
_pipeline_instances.pop(pipeline_id, None)
return {"status": "disabled", "pipeline_id": pipeline_id}
@router.get("/{pipeline_id}/health")
async def pipeline_health(pipeline_id: str) -> dict[str, Any]:
"""
Check pipeline health.
Returns health status and any issues detected.
"""
pipeline = await _get_pipeline_instance(pipeline_id)
try:
health = await pipeline.health_check()
return {
"pipeline_id": pipeline_id,
**health,
}
except Exception as e:
return {
"pipeline_id": pipeline_id,
"healthy": False,
"error": str(e),
}