Files
whyrating-engine-legacy/scripts/register_reputation_pipeline.py
2026-02-02 18:19:00 +00:00

143 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""
Register the Reputation Pipeline in the pipeline registry.
Usage:
python scripts/register_reputation_pipeline.py
"""
import asyncio
import os
import sys
import asyncpg
# Database URL
DB_URL = os.environ.get(
"DATABASE_URL",
"postgresql://scraper:scraper123@localhost:5437/scraper"
)
async def register_pipeline():
"""Register the Reputation Pipeline in the database."""
print(f"Connecting to database...")
conn = await asyncpg.connect(DB_URL)
try:
# Ensure the registry table exists
await conn.execute("""
CREATE TABLE IF NOT EXISTS pipeline.registry (
pipeline_id VARCHAR(50) PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT,
version VARCHAR(50) NOT NULL,
module_path VARCHAR(500) NOT NULL,
stages TEXT[] NOT NULL DEFAULT '{}',
input_type VARCHAR(100),
config JSONB,
is_enabled BOOLEAN NOT NULL DEFAULT TRUE,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
)
""")
# Register the Reputation Pipeline
result = await conn.execute("""
INSERT INTO pipeline.registry (
pipeline_id,
name,
description,
version,
module_path,
stages,
input_type,
is_enabled,
updated_at
)
VALUES (
'reputation',
'Reputation Analytics Pipeline',
'Primitives-based classification and reputation scoring. Generates business-facing analytics reports with domain breakdown, key drivers, and actionable insights.',
'2.0.0',
'reviewiq_pipeline.reputation_pipeline:ReputationPipeline',
ARRAY['classify', 'report'],
'BusinessInput',
TRUE,
NOW()
)
ON CONFLICT (pipeline_id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
version = EXCLUDED.version,
module_path = EXCLUDED.module_path,
stages = EXCLUDED.stages,
input_type = EXCLUDED.input_type,
is_enabled = EXCLUDED.is_enabled,
updated_at = NOW()
""")
print(f"✓ Registered 'reputation' pipeline")
# Also ensure the ReviewIQ pipeline is registered
result = await conn.execute("""
INSERT INTO pipeline.registry (
pipeline_id,
name,
description,
version,
module_path,
stages,
input_type,
is_enabled,
updated_at
)
VALUES (
'reviewiq',
'ReviewIQ Classification Pipeline',
'Classifies reviews using URT taxonomy, detects issues, and aggregates metrics for dashboards.',
'1.0.0',
'reviewiq_pipeline.pipeline:ReviewIQPipeline',
ARRAY['normalize', 'classify', 'route', 'aggregate', 'synthesize'],
'ScraperV1Output',
TRUE,
NOW()
)
ON CONFLICT (pipeline_id) DO UPDATE SET
name = EXCLUDED.name,
description = EXCLUDED.description,
version = EXCLUDED.version,
module_path = EXCLUDED.module_path,
stages = EXCLUDED.stages,
input_type = EXCLUDED.input_type,
is_enabled = EXCLUDED.is_enabled,
updated_at = NOW()
""")
print(f"✓ Registered 'reviewiq' pipeline")
# List all registered pipelines
rows = await conn.fetch("""
SELECT pipeline_id, name, version, is_enabled, stages
FROM pipeline.registry
ORDER BY name
""")
print(f"\n📋 Registered Pipelines:")
print("-" * 80)
for row in rows:
status = "✓ enabled" if row["is_enabled"] else "✗ disabled"
stages = ", ".join(row["stages"]) if row["stages"] else "none"
print(f" {row['pipeline_id']:20} v{row['version']:10} {status}")
print(f"{row['name']}")
print(f" → Stages: {stages}")
print()
finally:
await conn.close()
if __name__ == "__main__":
asyncio.run(register_pipeline())