whyrating-engine-legacy/migrations/versions/001_add_job_platform_fields.sql

-- =============================================================================
-- Migration: 001_add_job_platform_fields.sql
-- ReviewIQ Platform - Phase 1
-- =============================================================================
--
-- Adds multi-platform support fields to the jobs table for ReviewIQ integration.
-- Enables tracking of job origin, batch processing, execution variants, and
-- webhook callbacks for cross-platform orchestration.
--
-- Prerequisite: jobs table must already exist (created by core/database.py)
--
-- Date: 2026-01-24
-- =============================================================================

-- =============================================================================
-- SECTION 1: REQUESTER FIELDS
-- Track which client/platform submitted the job and why
-- =============================================================================

-- Client identifier from the requesting platform (e.g., "veritas_client_123")
-- Used for per-client analytics, rate limiting, and billing
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_client_id VARCHAR(255);
COMMENT ON COLUMN jobs.requester_client_id IS
    'Client identifier from requesting platform (e.g., "veritas_client_123")';

-- Source platform that submitted the job (e.g., "veritasreview.com")
-- Enables multi-tenant tracking and source-specific behavior
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_source VARCHAR(100);
COMMENT ON COLUMN jobs.requester_source IS
    'Source platform that submitted the job (e.g., "veritasreview.com")';

-- Purpose of the scrape for analytics and prioritization
-- Values: "client_report" | "prospect_screening" | "market_research"
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scrape_purpose VARCHAR(50);
COMMENT ON COLUMN jobs.scrape_purpose IS
    'Purpose of scrape: "client_report", "prospect_screening", "market_research"';

-- Flexible JSONB field for requester-specific metadata
-- Allows platforms to pass through custom data without schema changes
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_metadata JSONB;
COMMENT ON COLUMN jobs.requester_metadata IS
    'Flexible JSONB for requester-specific metadata (pass-through data)';


-- =============================================================================
-- SECTION 2: BATCH FIELDS
-- Support for grouped job submissions (e.g., "scrape these 50 locations")
-- =============================================================================

-- Links job to a batch record (batches table to be created in future migration)
-- NULL indicates a standalone job, not part of a batch
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS batch_id UUID;
COMMENT ON COLUMN jobs.batch_id IS
    'UUID linking to batches table (NULL for standalone jobs)';

-- Position within the batch (1-indexed: 1, 2, 3...)
-- Used for ordered processing and progress tracking
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS batch_index INTEGER;
COMMENT ON COLUMN jobs.batch_index IS
    'Position in batch (1-indexed), NULL for standalone jobs';


-- =============================================================================
-- SECTION 3: EXECUTION FIELDS
-- Control how the job is processed (type, version, priority)
-- =============================================================================

-- Type of scraping job (extensible for future scrapers)
-- Default "google_reviews" maintains backward compatibility
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS job_type VARCHAR(50) DEFAULT 'google_reviews';
COMMENT ON COLUMN jobs.job_type IS
    'Job type for multi-scraper support (default: "google_reviews")';

-- Scraper version that processed the job (e.g., "1.0.0", "2.1.3")
-- Essential for debugging, regression analysis, and A/B testing
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scraper_version VARCHAR(50);
COMMENT ON COLUMN jobs.scraper_version IS
    'Scraper version that processed this job (e.g., "1.0.0")';

-- Deployment variant used for canary/staged rollouts
-- Values: "stable" | "beta" | "canary"
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scraper_variant VARCHAR(20);
COMMENT ON COLUMN jobs.scraper_variant IS
    'Deployment variant: "stable", "beta", or "canary"';

-- Job priority for queue ordering
-- 0=normal (default), 1=high, 2=urgent
-- Higher priority jobs are processed first
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS priority INTEGER DEFAULT 0;
COMMENT ON COLUMN jobs.priority IS
    'Queue priority: 0=normal (default), 1=high, 2=urgent';


-- =============================================================================
-- SECTION 4: CALLBACK FIELDS
-- Webhook notification management (enhanced from existing webhook_url)
-- =============================================================================

-- Primary callback URL for job completion notifications
-- Separate from existing webhook_url to allow different callback patterns
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_url TEXT;
COMMENT ON COLUMN jobs.callback_url IS
    'Webhook URL for job completion callbacks';

-- Current status of callback delivery
-- Values: "pending" | "sent" | "failed"
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_status VARCHAR(20);
COMMENT ON COLUMN jobs.callback_status IS
    'Callback delivery status: "pending", "sent", "failed"';

-- Timestamp when callback was successfully sent
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_sent_at TIMESTAMP;
COMMENT ON COLUMN jobs.callback_sent_at IS
    'Timestamp when callback was successfully delivered';

-- Number of callback delivery attempts (for retry logic)
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_attempts INTEGER DEFAULT 0;
COMMENT ON COLUMN jobs.callback_attempts IS
    'Number of callback delivery attempts (for retry tracking)';


-- =============================================================================
-- SECTION 5: RESULT SUMMARY
-- Normalized summary for cross-platform dashboards
-- =============================================================================

-- JSONB summary of results for quick dashboard queries
-- Contains pre-computed metrics without loading full reviews_data
-- Example: {"total_reviews": 150, "avg_rating": 4.2, "sentiment": {"positive": 80, "negative": 20}}
ALTER TABLE jobs ADD COLUMN IF NOT EXISTS result_summary JSONB;
COMMENT ON COLUMN jobs.result_summary IS
    'JSONB summary for dashboards: review counts, ratings, sentiment breakdown';


-- =============================================================================
-- SECTION 6: INDEXES
-- Optimized for common query patterns
-- =============================================================================

-- Index for client-based queries (per-client job history, analytics)
CREATE INDEX IF NOT EXISTS idx_jobs_requester_client_id
    ON jobs(requester_client_id)
    WHERE requester_client_id IS NOT NULL;

-- Index for batch operations (get all jobs in a batch)
CREATE INDEX IF NOT EXISTS idx_jobs_batch_id
    ON jobs(batch_id)
    WHERE batch_id IS NOT NULL;

-- Index for job type filtering (when multiple scrapers exist)
CREATE INDEX IF NOT EXISTS idx_jobs_job_type
    ON jobs(job_type);

-- Index for priority queue ordering (high priority jobs first)
-- Composite with status for efficient "get next job" queries
CREATE INDEX IF NOT EXISTS idx_jobs_priority_status
    ON jobs(priority DESC, status, created_at ASC)
    WHERE status = 'pending';

-- Index for requester source analytics
CREATE INDEX IF NOT EXISTS idx_jobs_requester_source
    ON jobs(requester_source)
    WHERE requester_source IS NOT NULL;

-- Index for callback retry processing
CREATE INDEX IF NOT EXISTS idx_jobs_callback_pending
    ON jobs(callback_status, callback_attempts)
    WHERE callback_status IN ('pending', 'failed');

-- Composite index for scraper version analytics
CREATE INDEX IF NOT EXISTS idx_jobs_scraper_version
    ON jobs(scraper_version, scraper_variant)
    WHERE scraper_version IS NOT NULL;


-- =============================================================================
-- SECTION 7: CONSTRAINTS
-- Data integrity for new fields
-- =============================================================================

-- Ensure valid scrape_purpose values
ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scrape_purpose;
ALTER TABLE jobs ADD CONSTRAINT valid_scrape_purpose
    CHECK (scrape_purpose IS NULL OR scrape_purpose IN ('client_report', 'prospect_screening', 'market_research'));

-- Ensure valid scraper_variant values
ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scraper_variant;
ALTER TABLE jobs ADD CONSTRAINT valid_scraper_variant
    CHECK (scraper_variant IS NULL OR scraper_variant IN ('stable', 'beta', 'canary'));

-- Ensure valid callback_status values
ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_callback_status;
ALTER TABLE jobs ADD CONSTRAINT valid_callback_status
    CHECK (callback_status IS NULL OR callback_status IN ('pending', 'sent', 'failed'));

-- Ensure valid priority range
ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_priority;
ALTER TABLE jobs ADD CONSTRAINT valid_priority
    CHECK (priority >= 0 AND priority <= 2);

-- Ensure batch_index is positive when set
ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_batch_index;
ALTER TABLE jobs ADD CONSTRAINT valid_batch_index
    CHECK (batch_index IS NULL OR batch_index > 0);


-- =============================================================================
-- END OF MIGRATION
-- =============================================================================
--
-- Rollback commands (if needed):
--
-- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_client_id;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_source;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS scrape_purpose;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_metadata;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS batch_id;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS batch_index;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS job_type;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS scraper_version;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS scraper_variant;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS priority;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_url;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_status;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_sent_at;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_attempts;
-- ALTER TABLE jobs DROP COLUMN IF EXISTS result_summary;
--
-- DROP INDEX IF EXISTS idx_jobs_requester_client_id;
-- DROP INDEX IF EXISTS idx_jobs_batch_id;
-- DROP INDEX IF EXISTS idx_jobs_job_type;
-- DROP INDEX IF EXISTS idx_jobs_priority_status;
-- DROP INDEX IF EXISTS idx_jobs_requester_source;
-- DROP INDEX IF EXISTS idx_jobs_callback_pending;
-- DROP INDEX IF EXISTS idx_jobs_scraper_version;
--
-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scrape_purpose;
-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scraper_variant;
-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_callback_status;
-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_priority;
-- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_batch_index;
--
-- =============================================================================