Initial commit - WhyRating Engine (Google Reviews Scraper)
This commit is contained in:
142
scripts/register_reputation_pipeline.py
Normal file
142
scripts/register_reputation_pipeline.py
Normal file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Register the Reputation Pipeline in the pipeline registry.
|
||||
|
||||
Usage:
|
||||
python scripts/register_reputation_pipeline.py
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import asyncpg
|
||||
|
||||
# Database URL
|
||||
DB_URL = os.environ.get(
|
||||
"DATABASE_URL",
|
||||
"postgresql://scraper:scraper123@localhost:5437/scraper"
|
||||
)
|
||||
|
||||
|
||||
async def register_pipeline():
|
||||
"""Register the Reputation Pipeline in the database."""
|
||||
print(f"Connecting to database...")
|
||||
|
||||
conn = await asyncpg.connect(DB_URL)
|
||||
|
||||
try:
|
||||
# Ensure the registry table exists
|
||||
await conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS pipeline.registry (
|
||||
pipeline_id VARCHAR(50) PRIMARY KEY,
|
||||
name VARCHAR(255) NOT NULL,
|
||||
description TEXT,
|
||||
version VARCHAR(50) NOT NULL,
|
||||
module_path VARCHAR(500) NOT NULL,
|
||||
stages TEXT[] NOT NULL DEFAULT '{}',
|
||||
input_type VARCHAR(100),
|
||||
config JSONB,
|
||||
is_enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
)
|
||||
""")
|
||||
|
||||
# Register the Reputation Pipeline
|
||||
result = await conn.execute("""
|
||||
INSERT INTO pipeline.registry (
|
||||
pipeline_id,
|
||||
name,
|
||||
description,
|
||||
version,
|
||||
module_path,
|
||||
stages,
|
||||
input_type,
|
||||
is_enabled,
|
||||
updated_at
|
||||
)
|
||||
VALUES (
|
||||
'reputation',
|
||||
'Reputation Analytics Pipeline',
|
||||
'Primitives-based classification and reputation scoring. Generates business-facing analytics reports with domain breakdown, key drivers, and actionable insights.',
|
||||
'2.0.0',
|
||||
'reviewiq_pipeline.reputation_pipeline:ReputationPipeline',
|
||||
ARRAY['classify', 'report'],
|
||||
'BusinessInput',
|
||||
TRUE,
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (pipeline_id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
description = EXCLUDED.description,
|
||||
version = EXCLUDED.version,
|
||||
module_path = EXCLUDED.module_path,
|
||||
stages = EXCLUDED.stages,
|
||||
input_type = EXCLUDED.input_type,
|
||||
is_enabled = EXCLUDED.is_enabled,
|
||||
updated_at = NOW()
|
||||
""")
|
||||
|
||||
print(f"✓ Registered 'reputation' pipeline")
|
||||
|
||||
# Also ensure the ReviewIQ pipeline is registered
|
||||
result = await conn.execute("""
|
||||
INSERT INTO pipeline.registry (
|
||||
pipeline_id,
|
||||
name,
|
||||
description,
|
||||
version,
|
||||
module_path,
|
||||
stages,
|
||||
input_type,
|
||||
is_enabled,
|
||||
updated_at
|
||||
)
|
||||
VALUES (
|
||||
'reviewiq',
|
||||
'ReviewIQ Classification Pipeline',
|
||||
'Classifies reviews using URT taxonomy, detects issues, and aggregates metrics for dashboards.',
|
||||
'1.0.0',
|
||||
'reviewiq_pipeline.pipeline:ReviewIQPipeline',
|
||||
ARRAY['normalize', 'classify', 'route', 'aggregate', 'synthesize'],
|
||||
'ScraperV1Output',
|
||||
TRUE,
|
||||
NOW()
|
||||
)
|
||||
ON CONFLICT (pipeline_id) DO UPDATE SET
|
||||
name = EXCLUDED.name,
|
||||
description = EXCLUDED.description,
|
||||
version = EXCLUDED.version,
|
||||
module_path = EXCLUDED.module_path,
|
||||
stages = EXCLUDED.stages,
|
||||
input_type = EXCLUDED.input_type,
|
||||
is_enabled = EXCLUDED.is_enabled,
|
||||
updated_at = NOW()
|
||||
""")
|
||||
|
||||
print(f"✓ Registered 'reviewiq' pipeline")
|
||||
|
||||
# List all registered pipelines
|
||||
rows = await conn.fetch("""
|
||||
SELECT pipeline_id, name, version, is_enabled, stages
|
||||
FROM pipeline.registry
|
||||
ORDER BY name
|
||||
""")
|
||||
|
||||
print(f"\n📋 Registered Pipelines:")
|
||||
print("-" * 80)
|
||||
for row in rows:
|
||||
status = "✓ enabled" if row["is_enabled"] else "✗ disabled"
|
||||
stages = ", ".join(row["stages"]) if row["stages"] else "none"
|
||||
print(f" {row['pipeline_id']:20} v{row['version']:10} {status}")
|
||||
print(f" → {row['name']}")
|
||||
print(f" → Stages: {stages}")
|
||||
print()
|
||||
|
||||
finally:
|
||||
await conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(register_pipeline())
|
||||
Reference in New Issue
Block a user