#!/usr/bin/env python3 """ Register the Reputation Pipeline in the pipeline registry. Usage: python scripts/register_reputation_pipeline.py """ import asyncio import os import sys import asyncpg # Database URL DB_URL = os.environ.get( "DATABASE_URL", "postgresql://scraper:scraper123@localhost:5437/scraper" ) async def register_pipeline(): """Register the Reputation Pipeline in the database.""" print(f"Connecting to database...") conn = await asyncpg.connect(DB_URL) try: # Ensure the registry table exists await conn.execute(""" CREATE TABLE IF NOT EXISTS pipeline.registry ( pipeline_id VARCHAR(50) PRIMARY KEY, name VARCHAR(255) NOT NULL, description TEXT, version VARCHAR(50) NOT NULL, module_path VARCHAR(500) NOT NULL, stages TEXT[] NOT NULL DEFAULT '{}', input_type VARCHAR(100), config JSONB, is_enabled BOOLEAN NOT NULL DEFAULT TRUE, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ) """) # Register the Reputation Pipeline result = await conn.execute(""" INSERT INTO pipeline.registry ( pipeline_id, name, description, version, module_path, stages, input_type, is_enabled, updated_at ) VALUES ( 'reputation', 'Reputation Analytics Pipeline', 'Primitives-based classification and reputation scoring. Generates business-facing analytics reports with domain breakdown, key drivers, and actionable insights.', '2.0.0', 'reviewiq_pipeline.reputation_pipeline:ReputationPipeline', ARRAY['classify', 'report'], 'BusinessInput', TRUE, NOW() ) ON CONFLICT (pipeline_id) DO UPDATE SET name = EXCLUDED.name, description = EXCLUDED.description, version = EXCLUDED.version, module_path = EXCLUDED.module_path, stages = EXCLUDED.stages, input_type = EXCLUDED.input_type, is_enabled = EXCLUDED.is_enabled, updated_at = NOW() """) print(f"āœ“ Registered 'reputation' pipeline") # Also ensure the ReviewIQ pipeline is registered result = await conn.execute(""" INSERT INTO pipeline.registry ( pipeline_id, name, description, version, module_path, stages, input_type, is_enabled, updated_at ) VALUES ( 'reviewiq', 'ReviewIQ Classification Pipeline', 'Classifies reviews using URT taxonomy, detects issues, and aggregates metrics for dashboards.', '1.0.0', 'reviewiq_pipeline.pipeline:ReviewIQPipeline', ARRAY['normalize', 'classify', 'route', 'aggregate', 'synthesize'], 'ScraperV1Output', TRUE, NOW() ) ON CONFLICT (pipeline_id) DO UPDATE SET name = EXCLUDED.name, description = EXCLUDED.description, version = EXCLUDED.version, module_path = EXCLUDED.module_path, stages = EXCLUDED.stages, input_type = EXCLUDED.input_type, is_enabled = EXCLUDED.is_enabled, updated_at = NOW() """) print(f"āœ“ Registered 'reviewiq' pipeline") # List all registered pipelines rows = await conn.fetch(""" SELECT pipeline_id, name, version, is_enabled, stages FROM pipeline.registry ORDER BY name """) print(f"\nšŸ“‹ Registered Pipelines:") print("-" * 80) for row in rows: status = "āœ“ enabled" if row["is_enabled"] else "āœ— disabled" stages = ", ".join(row["stages"]) if row["stages"] else "none" print(f" {row['pipeline_id']:20} v{row['version']:10} {status}") print(f" → {row['name']}") print(f" → Stages: {stages}") print() finally: await conn.close() if __name__ == "__main__": asyncio.run(register_pipeline())