143 lines
4.6 KiB
Python
143 lines
4.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Register the Reputation Pipeline in the pipeline registry.
|
|
|
|
Usage:
|
|
python scripts/register_reputation_pipeline.py
|
|
"""
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
|
|
import asyncpg
|
|
|
|
# Database URL
|
|
DB_URL = os.environ.get(
|
|
"DATABASE_URL",
|
|
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
|
)
|
|
|
|
|
|
async def register_pipeline():
|
|
"""Register the Reputation Pipeline in the database."""
|
|
print(f"Connecting to database...")
|
|
|
|
conn = await asyncpg.connect(DB_URL)
|
|
|
|
try:
|
|
# Ensure the registry table exists
|
|
await conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS pipeline.registry (
|
|
pipeline_id VARCHAR(50) PRIMARY KEY,
|
|
name VARCHAR(255) NOT NULL,
|
|
description TEXT,
|
|
version VARCHAR(50) NOT NULL,
|
|
module_path VARCHAR(500) NOT NULL,
|
|
stages TEXT[] NOT NULL DEFAULT '{}',
|
|
input_type VARCHAR(100),
|
|
config JSONB,
|
|
is_enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
|
)
|
|
""")
|
|
|
|
# Register the Reputation Pipeline
|
|
result = await conn.execute("""
|
|
INSERT INTO pipeline.registry (
|
|
pipeline_id,
|
|
name,
|
|
description,
|
|
version,
|
|
module_path,
|
|
stages,
|
|
input_type,
|
|
is_enabled,
|
|
updated_at
|
|
)
|
|
VALUES (
|
|
'reputation',
|
|
'Reputation Analytics Pipeline',
|
|
'Primitives-based classification and reputation scoring. Generates business-facing analytics reports with domain breakdown, key drivers, and actionable insights.',
|
|
'2.0.0',
|
|
'reviewiq_pipeline.reputation_pipeline:ReputationPipeline',
|
|
ARRAY['classify', 'report'],
|
|
'BusinessInput',
|
|
TRUE,
|
|
NOW()
|
|
)
|
|
ON CONFLICT (pipeline_id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
description = EXCLUDED.description,
|
|
version = EXCLUDED.version,
|
|
module_path = EXCLUDED.module_path,
|
|
stages = EXCLUDED.stages,
|
|
input_type = EXCLUDED.input_type,
|
|
is_enabled = EXCLUDED.is_enabled,
|
|
updated_at = NOW()
|
|
""")
|
|
|
|
print(f"✓ Registered 'reputation' pipeline")
|
|
|
|
# Also ensure the ReviewIQ pipeline is registered
|
|
result = await conn.execute("""
|
|
INSERT INTO pipeline.registry (
|
|
pipeline_id,
|
|
name,
|
|
description,
|
|
version,
|
|
module_path,
|
|
stages,
|
|
input_type,
|
|
is_enabled,
|
|
updated_at
|
|
)
|
|
VALUES (
|
|
'reviewiq',
|
|
'ReviewIQ Classification Pipeline',
|
|
'Classifies reviews using URT taxonomy, detects issues, and aggregates metrics for dashboards.',
|
|
'1.0.0',
|
|
'reviewiq_pipeline.pipeline:ReviewIQPipeline',
|
|
ARRAY['normalize', 'classify', 'route', 'aggregate', 'synthesize'],
|
|
'ScraperV1Output',
|
|
TRUE,
|
|
NOW()
|
|
)
|
|
ON CONFLICT (pipeline_id) DO UPDATE SET
|
|
name = EXCLUDED.name,
|
|
description = EXCLUDED.description,
|
|
version = EXCLUDED.version,
|
|
module_path = EXCLUDED.module_path,
|
|
stages = EXCLUDED.stages,
|
|
input_type = EXCLUDED.input_type,
|
|
is_enabled = EXCLUDED.is_enabled,
|
|
updated_at = NOW()
|
|
""")
|
|
|
|
print(f"✓ Registered 'reviewiq' pipeline")
|
|
|
|
# List all registered pipelines
|
|
rows = await conn.fetch("""
|
|
SELECT pipeline_id, name, version, is_enabled, stages
|
|
FROM pipeline.registry
|
|
ORDER BY name
|
|
""")
|
|
|
|
print(f"\n📋 Registered Pipelines:")
|
|
print("-" * 80)
|
|
for row in rows:
|
|
status = "✓ enabled" if row["is_enabled"] else "✗ disabled"
|
|
stages = ", ".join(row["stages"]) if row["stages"] else "none"
|
|
print(f" {row['pipeline_id']:20} v{row['version']:10} {status}")
|
|
print(f" → {row['name']}")
|
|
print(f" → Stages: {stages}")
|
|
print()
|
|
|
|
finally:
|
|
await conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(register_pipeline())
|