feat: Add extensible multi-pipeline integration system

This commit implements a plugin-like pipeline architecture with:

Pipeline Core Package (packages/pipeline-core/):
- BasePipeline abstract class all pipelines implement
- PipelineRegistry for database-backed discovery/management
- PipelineRunner for execution with status tracking
- DashboardConfig contracts for dynamic widget definitions

Database Migration (006_pipeline_registry.sql):
- pipeline.registry table for registered pipelines
- pipeline.executions table for execution history
- Views for execution stats and monitoring

ReviewIQ Pipeline Refactor:
- Implements BasePipeline interface
- Adds get_dashboard_config() with widget definitions
- Adds get_widget_data() methods for all dashboard widgets
- Maintains backward compatibility with Pipeline alias

Generic Pipeline API (api/routes/pipelines.py):
- GET /api/pipelines - List all registered pipelines
- GET /api/pipelines/{id} - Pipeline details
- POST /api/pipelines/{id}/execute - Execute pipeline
- GET /api/pipelines/{id}/dashboard - Dashboard config
- GET /api/pipelines/{id}/widgets/{w} - Widget data
- GET /api/pipelines/{id}/executions - Execution history

Frontend Dynamic Dashboard System:
- DynamicDashboard component renders from config
- WidgetRegistry maps types to components
- Widget components: StatCard, LineChart, BarChart,
  PieChart, DataTable, Heatmap
- Pipeline API client library

Frontend Pipeline Pages:
- /pipelines - List all registered pipelines
- /pipelines/[id] - Dynamic dashboard for pipeline
- /pipelines/[id]/executions - Execution history
- Pipelines nav item in Sidebar

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 19:05:38 +00:00
parent d64f06ba9e
commit 824634aa76
30 changed files with 5697 additions and 95 deletions

View File

@@ -0,0 +1,253 @@
-- =============================================================================
-- Migration: 006_pipeline_registry.sql
-- Pipeline Registry and Execution History
-- =============================================================================
--
-- Creates tables for the extensible pipeline system:
-- pipeline.registry - Registered pipelines and their metadata
-- pipeline.executions - Pipeline execution history
--
-- This enables dynamic pipeline discovery, registration, and execution tracking.
--
-- Date: 2026-01-24
-- =============================================================================
-- Ensure pipeline schema exists (should already exist from 005)
CREATE SCHEMA IF NOT EXISTS pipeline;
-- =============================================================================
-- SECTION 1: PIPELINE REGISTRY
-- =============================================================================
-- Pipeline registry table
-- Stores registered pipelines and their metadata for dynamic discovery
CREATE TABLE IF NOT EXISTS pipeline.registry (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Pipeline identity
pipeline_id VARCHAR(50) NOT NULL UNIQUE,
name VARCHAR(255) NOT NULL,
description TEXT,
version VARCHAR(20) NOT NULL,
-- Module information for dynamic loading
module_path VARCHAR(255) NOT NULL, -- e.g., "reviewiq_pipeline.pipeline:ReviewIQPipeline"
-- Pipeline configuration
stages TEXT[] NOT NULL DEFAULT '{}',
input_type VARCHAR(100) NOT NULL DEFAULT 'dict',
config JSONB,
-- Status
is_enabled BOOLEAN NOT NULL DEFAULT TRUE,
-- Timestamps
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
COMMENT ON TABLE pipeline.registry IS 'Registered pipelines available for execution';
COMMENT ON COLUMN pipeline.registry.pipeline_id IS 'Unique pipeline identifier (e.g., "reviewiq")';
COMMENT ON COLUMN pipeline.registry.module_path IS 'Python module path for dynamic import (package.module:ClassName)';
COMMENT ON COLUMN pipeline.registry.stages IS 'Ordered list of stage names';
COMMENT ON COLUMN pipeline.registry.input_type IS 'Expected input type (for documentation/validation)';
COMMENT ON COLUMN pipeline.registry.config IS 'Pipeline-specific configuration as JSON';
-- Indexes for registry
CREATE INDEX IF NOT EXISTS idx_registry_enabled
ON pipeline.registry (is_enabled)
WHERE is_enabled = TRUE;
-- =============================================================================
-- SECTION 2: EXECUTION HISTORY
-- =============================================================================
-- Execution status enum
DO $$ BEGIN
CREATE TYPE pipeline.execution_status AS ENUM (
'pending',
'running',
'completed',
'failed',
'cancelled'
);
EXCEPTION
WHEN duplicate_object THEN NULL;
END $$;
-- Pipeline execution history
-- Tracks each pipeline execution for monitoring and debugging
CREATE TABLE IF NOT EXISTS pipeline.executions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Pipeline reference
pipeline_id VARCHAR(50) NOT NULL,
-- Optional associations
job_id UUID, -- Link to scraper job (soft FK to public.jobs)
business_id VARCHAR(255), -- Business being processed
-- Execution status
status pipeline.execution_status NOT NULL DEFAULT 'pending',
-- Stage tracking
stages_requested TEXT[] NOT NULL DEFAULT '{}',
stages_completed TEXT[] NOT NULL DEFAULT '{}',
current_stage VARCHAR(100),
-- Input/output summaries (for quick reference without loading full data)
input_summary JSONB,
result_summary JSONB,
-- Error tracking
error_message TEXT,
-- Timestamps
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
COMMENT ON TABLE pipeline.executions IS 'Pipeline execution history for monitoring and debugging';
COMMENT ON COLUMN pipeline.executions.pipeline_id IS 'Reference to pipeline.registry.pipeline_id';
COMMENT ON COLUMN pipeline.executions.job_id IS 'Optional link to scraper job (soft FK)';
COMMENT ON COLUMN pipeline.executions.stages_requested IS 'Stages requested to run';
COMMENT ON COLUMN pipeline.executions.stages_completed IS 'Stages that completed successfully';
COMMENT ON COLUMN pipeline.executions.input_summary IS 'Summary of input data (for display)';
COMMENT ON COLUMN pipeline.executions.result_summary IS 'Summary of results (for display)';
-- Indexes for execution queries
CREATE INDEX IF NOT EXISTS idx_executions_pipeline_id
ON pipeline.executions (pipeline_id);
CREATE INDEX IF NOT EXISTS idx_executions_job_id
ON pipeline.executions (job_id)
WHERE job_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_executions_business_id
ON pipeline.executions (business_id)
WHERE business_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_executions_status
ON pipeline.executions (status);
CREATE INDEX IF NOT EXISTS idx_executions_created_at
ON pipeline.executions (created_at DESC);
-- Composite index for common query patterns
CREATE INDEX IF NOT EXISTS idx_executions_pipeline_status
ON pipeline.executions (pipeline_id, status, created_at DESC);
-- =============================================================================
-- SECTION 3: TRIGGER FOR UPDATED_AT
-- =============================================================================
-- Function to update updated_at timestamp
CREATE OR REPLACE FUNCTION pipeline.update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Trigger for registry table
DROP TRIGGER IF EXISTS tr_registry_updated_at ON pipeline.registry;
CREATE TRIGGER tr_registry_updated_at
BEFORE UPDATE ON pipeline.registry
FOR EACH ROW
EXECUTE FUNCTION pipeline.update_updated_at_column();
-- =============================================================================
-- SECTION 4: INITIAL DATA
-- =============================================================================
-- Register the ReviewIQ pipeline (can be updated by the application on startup)
INSERT INTO pipeline.registry (
pipeline_id,
name,
description,
version,
module_path,
stages,
input_type,
is_enabled
)
VALUES (
'reviewiq',
'ReviewIQ Classification Pipeline',
'Classifies reviews using URT taxonomy, detects issues, and aggregates metrics',
'1.0.0',
'reviewiq_pipeline.pipeline:ReviewIQPipeline',
ARRAY['normalize', 'classify', 'route', 'aggregate'],
'ScraperV1Output',
TRUE
)
ON CONFLICT (pipeline_id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
version = EXCLUDED.version,
module_path = EXCLUDED.module_path,
stages = EXCLUDED.stages,
input_type = EXCLUDED.input_type,
updated_at = NOW();
-- =============================================================================
-- SECTION 5: VIEWS
-- =============================================================================
-- View for recent executions with pipeline info
CREATE OR REPLACE VIEW pipeline.executions_with_pipeline AS
SELECT
e.id,
e.pipeline_id,
r.name AS pipeline_name,
e.job_id,
e.business_id,
e.status,
e.stages_requested,
e.stages_completed,
e.current_stage,
e.error_message,
e.started_at,
e.completed_at,
e.created_at,
CASE
WHEN e.status = 'running' THEN
EXTRACT(EPOCH FROM (NOW() - e.started_at))::INTEGER
WHEN e.completed_at IS NOT NULL THEN
EXTRACT(EPOCH FROM (e.completed_at - e.started_at))::INTEGER
ELSE NULL
END AS duration_seconds
FROM pipeline.executions e
LEFT JOIN pipeline.registry r ON e.pipeline_id = r.pipeline_id;
COMMENT ON VIEW pipeline.executions_with_pipeline IS 'Executions joined with pipeline metadata and duration';
-- View for pipeline execution statistics
CREATE OR REPLACE VIEW pipeline.execution_stats AS
SELECT
pipeline_id,
COUNT(*) AS total_executions,
COUNT(*) FILTER (WHERE status = 'completed') AS completed_count,
COUNT(*) FILTER (WHERE status = 'failed') AS failed_count,
COUNT(*) FILTER (WHERE status = 'running') AS running_count,
COUNT(*) FILTER (WHERE status = 'cancelled') AS cancelled_count,
AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
FILTER (WHERE status = 'completed') AS avg_duration_seconds,
MAX(created_at) AS last_execution_at
FROM pipeline.executions
GROUP BY pipeline_id;
COMMENT ON VIEW pipeline.execution_stats IS 'Aggregated execution statistics per pipeline';
-- =============================================================================
-- DONE
-- =============================================================================