feat: Add decoupled pipeline schema with separate PostgreSQL namespace

- Create consolidated migration (005_create_pipeline_schema.sql) with
  'pipeline' schema for all classification tables
- Update pipeline repositories to use schema prefix (pipeline.*)
- Add run_migrations() method to DatabaseManager
- Add CLI tool for running versioned migrations

Tables created in pipeline schema:
- reviews_raw, reviews_enriched (Stage 1)
- review_spans (Stage 2)
- issues, issue_spans, issue_events (Stage 3)
- fact_timeseries (Stage 4)
- urt_domains, urt_categories (taxonomy lookup)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-01-24 18:17:20 +00:00
parent 7d720f5378
commit 03ed7029e2
4 changed files with 710 additions and 23 deletions

View File

@@ -182,6 +182,66 @@ class DatabaseManager:
log.info("Database schema initialized")
async def run_migrations(self, migrations_dir: str = "migrations/versions"):
"""
Run versioned migrations from SQL files.
Args:
migrations_dir: Path to directory containing .sql migration files.
Files are run in sorted order.
Returns:
Number of migrations applied.
"""
from pathlib import Path
migrations_path = Path(migrations_dir)
if not migrations_path.exists():
log.warning(f"Migrations directory not found: {migrations_dir}")
return 0
async with self.pool.acquire() as conn:
# Create migrations tracking table
await conn.execute("""
CREATE TABLE IF NOT EXISTS _migrations (
id SERIAL PRIMARY KEY,
filename VARCHAR(255) UNIQUE NOT NULL,
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
)
""")
# Get already applied migrations
applied = await conn.fetch("SELECT filename FROM _migrations")
applied_set = {r["filename"] for r in applied}
# Find and run pending migrations
migration_files = sorted(migrations_path.glob("*.sql"))
migrations_run = 0
for migration_file in migration_files:
filename = migration_file.name
if filename in applied_set:
continue
log.info(f"Running migration: {filename}")
async with conn.transaction():
try:
sql = migration_file.read_text()
await conn.execute(sql)
await conn.execute(
"INSERT INTO _migrations (filename) VALUES ($1)",
filename,
)
migrations_run += 1
log.info(f"Migration {filename} applied successfully")
except Exception as e:
log.error(f"Migration {filename} failed: {e}")
raise
log.info(f"Ran {migrations_run} migrations")
return migrations_run
# ==================== Job Operations ====================
async def create_job(