feat: Add decoupled pipeline schema with separate PostgreSQL namespace
- Create consolidated migration (005_create_pipeline_schema.sql) with 'pipeline' schema for all classification tables - Update pipeline repositories to use schema prefix (pipeline.*) - Add run_migrations() method to DatabaseManager - Add CLI tool for running versioned migrations Tables created in pipeline schema: - reviews_raw, reviews_enriched (Stage 1) - review_spans (Stage 2) - issues, issue_spans, issue_events (Stage 3) - fact_timeseries (Stage 4) - urt_domains, urt_categories (taxonomy lookup) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -182,6 +182,66 @@ class DatabaseManager:
|
||||
|
||||
log.info("Database schema initialized")
|
||||
|
||||
async def run_migrations(self, migrations_dir: str = "migrations/versions"):
|
||||
"""
|
||||
Run versioned migrations from SQL files.
|
||||
|
||||
Args:
|
||||
migrations_dir: Path to directory containing .sql migration files.
|
||||
Files are run in sorted order.
|
||||
|
||||
Returns:
|
||||
Number of migrations applied.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
migrations_path = Path(migrations_dir)
|
||||
if not migrations_path.exists():
|
||||
log.warning(f"Migrations directory not found: {migrations_dir}")
|
||||
return 0
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
# Create migrations tracking table
|
||||
await conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS _migrations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
filename VARCHAR(255) UNIQUE NOT NULL,
|
||||
applied_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
||||
)
|
||||
""")
|
||||
|
||||
# Get already applied migrations
|
||||
applied = await conn.fetch("SELECT filename FROM _migrations")
|
||||
applied_set = {r["filename"] for r in applied}
|
||||
|
||||
# Find and run pending migrations
|
||||
migration_files = sorted(migrations_path.glob("*.sql"))
|
||||
migrations_run = 0
|
||||
|
||||
for migration_file in migration_files:
|
||||
filename = migration_file.name
|
||||
if filename in applied_set:
|
||||
continue
|
||||
|
||||
log.info(f"Running migration: {filename}")
|
||||
|
||||
async with conn.transaction():
|
||||
try:
|
||||
sql = migration_file.read_text()
|
||||
await conn.execute(sql)
|
||||
await conn.execute(
|
||||
"INSERT INTO _migrations (filename) VALUES ($1)",
|
||||
filename,
|
||||
)
|
||||
migrations_run += 1
|
||||
log.info(f"Migration {filename} applied successfully")
|
||||
except Exception as e:
|
||||
log.error(f"Migration {filename} failed: {e}")
|
||||
raise
|
||||
|
||||
log.info(f"Ran {migrations_run} migrations")
|
||||
return migrations_run
|
||||
|
||||
# ==================== Job Operations ====================
|
||||
|
||||
async def create_job(
|
||||
|
||||
Reference in New Issue
Block a user