Phase 1: Database migrations for platform features

Migrations created: - 001_add_job_platform_fields.sql: Add 15 new columns to jobs table - Requester tracking (client_id, source, purpose, metadata) - Batch support (batch_id, batch_index) - Execution tracking (job_type, scraper_version, variant, priority) - Webhook callbacks (url, status, sent_at, attempts) - Result summary (JSONB for cross-type dashboard) - 7 indexes for query performance - 5 CHECK constraints for data validation - 002_create_batches_table.sql: Batch job grouping - Tracks batch progress (total/completed/failed) - Batch-level callbacks - Requester association - 003_create_scraper_registry.sql: Scraper version management - Version routing (stable/beta/canary variants) - A/B traffic splitting (traffic_pct) - Priority-based routing - Seeds google_reviews v1.0.0 as stable default - 004_create_api_keys.sql: API authentication - Secure key storage (SHA-256 hashes, not plaintext) - Scopes-based permissions - Rate limiting support - Key lifecycle (expiry, active status) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 15:24:28 +00:00
parent 544e028c3f
commit 2412996c54
5 changed files with 554 additions and 1 deletions
--- a/migrations/versions/001_add_job_platform_fields.sql
+++ b/migrations/versions/001_add_job_platform_fields.sql
@@ -0,0 +1,243 @@
+-- =============================================================================
+-- Migration: 001_add_job_platform_fields.sql
+-- ReviewIQ Platform - Phase 1
+-- =============================================================================
+--
+-- Adds multi-platform support fields to the jobs table for ReviewIQ integration.
+-- Enables tracking of job origin, batch processing, execution variants, and
+-- webhook callbacks for cross-platform orchestration.
+--
+-- Prerequisite: jobs table must already exist (created by core/database.py)
+--
+-- Date: 2026-01-24
+-- =============================================================================
+
+-- =============================================================================
+-- SECTION 1: REQUESTER FIELDS
+-- Track which client/platform submitted the job and why
+-- =============================================================================
+
+-- Client identifier from the requesting platform (e.g., "veritas_client_123")
+-- Used for per-client analytics, rate limiting, and billing
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_client_id VARCHAR(255);
+COMMENT ON COLUMN jobs.requester_client_id IS
+    'Client identifier from requesting platform (e.g., "veritas_client_123")';
+
+-- Source platform that submitted the job (e.g., "veritasreview.com")
+-- Enables multi-tenant tracking and source-specific behavior
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_source VARCHAR(100);
+COMMENT ON COLUMN jobs.requester_source IS
+    'Source platform that submitted the job (e.g., "veritasreview.com")';
+
+-- Purpose of the scrape for analytics and prioritization
+-- Values: "client_report" | "prospect_screening" | "market_research"
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scrape_purpose VARCHAR(50);
+COMMENT ON COLUMN jobs.scrape_purpose IS
+    'Purpose of scrape: "client_report", "prospect_screening", "market_research"';
+
+-- Flexible JSONB field for requester-specific metadata
+-- Allows platforms to pass through custom data without schema changes
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_metadata JSONB;
+COMMENT ON COLUMN jobs.requester_metadata IS
+    'Flexible JSONB for requester-specific metadata (pass-through data)';
+
+
+-- =============================================================================
+-- SECTION 2: BATCH FIELDS
+-- Support for grouped job submissions (e.g., "scrape these 50 locations")
+-- =============================================================================
+
+-- Links job to a batch record (batches table to be created in future migration)
+-- NULL indicates a standalone job, not part of a batch
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS batch_id UUID;
+COMMENT ON COLUMN jobs.batch_id IS
+    'UUID linking to batches table (NULL for standalone jobs)';
+
+-- Position within the batch (1-indexed: 1, 2, 3...)
+-- Used for ordered processing and progress tracking
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS batch_index INTEGER;
+COMMENT ON COLUMN jobs.batch_index IS
+    'Position in batch (1-indexed), NULL for standalone jobs';
+
+
+-- =============================================================================
+-- SECTION 3: EXECUTION FIELDS
+-- Control how the job is processed (type, version, priority)
+-- =============================================================================
+
+-- Type of scraping job (extensible for future scrapers)
+-- Default "google_reviews" maintains backward compatibility
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS job_type VARCHAR(50) DEFAULT 'google_reviews';
+COMMENT ON COLUMN jobs.job_type IS
+    'Job type for multi-scraper support (default: "google_reviews")';
+
+-- Scraper version that processed the job (e.g., "1.0.0", "2.1.3")
+-- Essential for debugging, regression analysis, and A/B testing
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scraper_version VARCHAR(50);
+COMMENT ON COLUMN jobs.scraper_version IS
+    'Scraper version that processed this job (e.g., "1.0.0")';
+
+-- Deployment variant used for canary/staged rollouts
+-- Values: "stable" | "beta" | "canary"
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scraper_variant VARCHAR(20);
+COMMENT ON COLUMN jobs.scraper_variant IS
+    'Deployment variant: "stable", "beta", or "canary"';
+
+-- Job priority for queue ordering
+-- 0=normal (default), 1=high, 2=urgent
+-- Higher priority jobs are processed first
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS priority INTEGER DEFAULT 0;
+COMMENT ON COLUMN jobs.priority IS
+    'Queue priority: 0=normal (default), 1=high, 2=urgent';
+
+
+-- =============================================================================
+-- SECTION 4: CALLBACK FIELDS
+-- Webhook notification management (enhanced from existing webhook_url)
+-- =============================================================================
+
+-- Primary callback URL for job completion notifications
+-- Separate from existing webhook_url to allow different callback patterns
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_url TEXT;
+COMMENT ON COLUMN jobs.callback_url IS
+    'Webhook URL for job completion callbacks';
+
+-- Current status of callback delivery
+-- Values: "pending" | "sent" | "failed"
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_status VARCHAR(20);
+COMMENT ON COLUMN jobs.callback_status IS
+    'Callback delivery status: "pending", "sent", "failed"';
+
+-- Timestamp when callback was successfully sent
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_sent_at TIMESTAMP;
+COMMENT ON COLUMN jobs.callback_sent_at IS
+    'Timestamp when callback was successfully delivered';
+
+-- Number of callback delivery attempts (for retry logic)
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_attempts INTEGER DEFAULT 0;
+COMMENT ON COLUMN jobs.callback_attempts IS
+    'Number of callback delivery attempts (for retry tracking)';
+
+
+-- =============================================================================
+-- SECTION 5: RESULT SUMMARY
+-- Normalized summary for cross-platform dashboards
+-- =============================================================================
+
+-- JSONB summary of results for quick dashboard queries
+-- Contains pre-computed metrics without loading full reviews_data
+-- Example: {"total_reviews": 150, "avg_rating": 4.2, "sentiment": {"positive": 80, "negative": 20}}
+ALTER TABLE jobs ADD COLUMN IF NOT EXISTS result_summary JSONB;
+COMMENT ON COLUMN jobs.result_summary IS
+    'JSONB summary for dashboards: review counts, ratings, sentiment breakdown';
+
+
+-- =============================================================================
+-- SECTION 6: INDEXES
+-- Optimized for common query patterns
+-- =============================================================================
+
+-- Index for client-based queries (per-client job history, analytics)
+CREATE INDEX IF NOT EXISTS idx_jobs_requester_client_id
+    ON jobs(requester_client_id)
+    WHERE requester_client_id IS NOT NULL;
+
+-- Index for batch operations (get all jobs in a batch)
+CREATE INDEX IF NOT EXISTS idx_jobs_batch_id
+    ON jobs(batch_id)
+    WHERE batch_id IS NOT NULL;
+
+-- Index for job type filtering (when multiple scrapers exist)
+CREATE INDEX IF NOT EXISTS idx_jobs_job_type
+    ON jobs(job_type);
+
+-- Index for priority queue ordering (high priority jobs first)
+-- Composite with status for efficient "get next job" queries
+CREATE INDEX IF NOT EXISTS idx_jobs_priority_status
+    ON jobs(priority DESC, status, created_at ASC)
+    WHERE status = 'pending';
+
+-- Index for requester source analytics
+CREATE INDEX IF NOT EXISTS idx_jobs_requester_source
+    ON jobs(requester_source)
+    WHERE requester_source IS NOT NULL;
+
+-- Index for callback retry processing
+CREATE INDEX IF NOT EXISTS idx_jobs_callback_pending
+    ON jobs(callback_status, callback_attempts)
+    WHERE callback_status IN ('pending', 'failed');
+
+-- Composite index for scraper version analytics
+CREATE INDEX IF NOT EXISTS idx_jobs_scraper_version
+    ON jobs(scraper_version, scraper_variant)
+    WHERE scraper_version IS NOT NULL;
+
+
+-- =============================================================================
+-- SECTION 7: CONSTRAINTS
+-- Data integrity for new fields
+-- =============================================================================
+
+-- Ensure valid scrape_purpose values
+ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scrape_purpose;
+ALTER TABLE jobs ADD CONSTRAINT valid_scrape_purpose
+    CHECK (scrape_purpose IS NULL OR scrape_purpose IN ('client_report', 'prospect_screening', 'market_research'));
+
+-- Ensure valid scraper_variant values
+ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scraper_variant;
+ALTER TABLE jobs ADD CONSTRAINT valid_scraper_variant
+    CHECK (scraper_variant IS NULL OR scraper_variant IN ('stable', 'beta', 'canary'));
+
+-- Ensure valid callback_status values
+ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_callback_status;
+ALTER TABLE jobs ADD CONSTRAINT valid_callback_status
+    CHECK (callback_status IS NULL OR callback_status IN ('pending', 'sent', 'failed'));
+
+-- Ensure valid priority range
+ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_priority;
+ALTER TABLE jobs ADD CONSTRAINT valid_priority
+    CHECK (priority >= 0 AND priority <= 2);
+
+-- Ensure batch_index is positive when set
+ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_batch_index;
+ALTER TABLE jobs ADD CONSTRAINT valid_batch_index
+    CHECK (batch_index IS NULL OR batch_index > 0);
+
+
+-- =============================================================================
+-- END OF MIGRATION
+-- =============================================================================
+--
+-- Rollback commands (if needed):
+--
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_client_id;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_source;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS scrape_purpose;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_metadata;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS batch_id;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS batch_index;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS job_type;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS scraper_version;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS scraper_variant;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS priority;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_url;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_status;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_sent_at;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_attempts;
+-- ALTER TABLE jobs DROP COLUMN IF EXISTS result_summary;
+--
+-- DROP INDEX IF EXISTS idx_jobs_requester_client_id;
+-- DROP INDEX IF EXISTS idx_jobs_batch_id;
+-- DROP INDEX IF EXISTS idx_jobs_job_type;
+-- DROP INDEX IF EXISTS idx_jobs_priority_status;
+-- DROP INDEX IF EXISTS idx_jobs_requester_source;
+-- DROP INDEX IF EXISTS idx_jobs_callback_pending;
+-- DROP INDEX IF EXISTS idx_jobs_scraper_version;
+--
+-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scrape_purpose;
+-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scraper_variant;
+-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_callback_status;
+-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_priority;
+-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_batch_index;
+--
+-- =============================================================================