-- ============================================================================= -- Migration: 001_add_job_platform_fields.sql -- ReviewIQ Platform - Phase 1 -- ============================================================================= -- -- Adds multi-platform support fields to the jobs table for ReviewIQ integration. -- Enables tracking of job origin, batch processing, execution variants, and -- webhook callbacks for cross-platform orchestration. -- -- Prerequisite: jobs table must already exist (created by core/database.py) -- -- Date: 2026-01-24 -- ============================================================================= -- ============================================================================= -- SECTION 1: REQUESTER FIELDS -- Track which client/platform submitted the job and why -- ============================================================================= -- Client identifier from the requesting platform (e.g., "veritas_client_123") -- Used for per-client analytics, rate limiting, and billing ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_client_id VARCHAR(255); COMMENT ON COLUMN jobs.requester_client_id IS 'Client identifier from requesting platform (e.g., "veritas_client_123")'; -- Source platform that submitted the job (e.g., "veritasreview.com") -- Enables multi-tenant tracking and source-specific behavior ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_source VARCHAR(100); COMMENT ON COLUMN jobs.requester_source IS 'Source platform that submitted the job (e.g., "veritasreview.com")'; -- Purpose of the scrape for analytics and prioritization -- Values: "client_report" | "prospect_screening" | "market_research" ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scrape_purpose VARCHAR(50); COMMENT ON COLUMN jobs.scrape_purpose IS 'Purpose of scrape: "client_report", "prospect_screening", "market_research"'; -- Flexible JSONB field for requester-specific metadata -- Allows platforms to pass through custom data without schema changes ALTER TABLE jobs ADD COLUMN IF NOT EXISTS requester_metadata JSONB; COMMENT ON COLUMN jobs.requester_metadata IS 'Flexible JSONB for requester-specific metadata (pass-through data)'; -- ============================================================================= -- SECTION 2: BATCH FIELDS -- Support for grouped job submissions (e.g., "scrape these 50 locations") -- ============================================================================= -- Links job to a batch record (batches table to be created in future migration) -- NULL indicates a standalone job, not part of a batch ALTER TABLE jobs ADD COLUMN IF NOT EXISTS batch_id UUID; COMMENT ON COLUMN jobs.batch_id IS 'UUID linking to batches table (NULL for standalone jobs)'; -- Position within the batch (1-indexed: 1, 2, 3...) -- Used for ordered processing and progress tracking ALTER TABLE jobs ADD COLUMN IF NOT EXISTS batch_index INTEGER; COMMENT ON COLUMN jobs.batch_index IS 'Position in batch (1-indexed), NULL for standalone jobs'; -- ============================================================================= -- SECTION 3: EXECUTION FIELDS -- Control how the job is processed (type, version, priority) -- ============================================================================= -- Type of scraping job (extensible for future scrapers) -- Default "google_reviews" maintains backward compatibility ALTER TABLE jobs ADD COLUMN IF NOT EXISTS job_type VARCHAR(50) DEFAULT 'google_reviews'; COMMENT ON COLUMN jobs.job_type IS 'Job type for multi-scraper support (default: "google_reviews")'; -- Scraper version that processed the job (e.g., "1.0.0", "2.1.3") -- Essential for debugging, regression analysis, and A/B testing ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scraper_version VARCHAR(50); COMMENT ON COLUMN jobs.scraper_version IS 'Scraper version that processed this job (e.g., "1.0.0")'; -- Deployment variant used for canary/staged rollouts -- Values: "stable" | "beta" | "canary" ALTER TABLE jobs ADD COLUMN IF NOT EXISTS scraper_variant VARCHAR(20); COMMENT ON COLUMN jobs.scraper_variant IS 'Deployment variant: "stable", "beta", or "canary"'; -- Job priority for queue ordering -- 0=normal (default), 1=high, 2=urgent -- Higher priority jobs are processed first ALTER TABLE jobs ADD COLUMN IF NOT EXISTS priority INTEGER DEFAULT 0; COMMENT ON COLUMN jobs.priority IS 'Queue priority: 0=normal (default), 1=high, 2=urgent'; -- ============================================================================= -- SECTION 4: CALLBACK FIELDS -- Webhook notification management (enhanced from existing webhook_url) -- ============================================================================= -- Primary callback URL for job completion notifications -- Separate from existing webhook_url to allow different callback patterns ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_url TEXT; COMMENT ON COLUMN jobs.callback_url IS 'Webhook URL for job completion callbacks'; -- Current status of callback delivery -- Values: "pending" | "sent" | "failed" ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_status VARCHAR(20); COMMENT ON COLUMN jobs.callback_status IS 'Callback delivery status: "pending", "sent", "failed"'; -- Timestamp when callback was successfully sent ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_sent_at TIMESTAMP; COMMENT ON COLUMN jobs.callback_sent_at IS 'Timestamp when callback was successfully delivered'; -- Number of callback delivery attempts (for retry logic) ALTER TABLE jobs ADD COLUMN IF NOT EXISTS callback_attempts INTEGER DEFAULT 0; COMMENT ON COLUMN jobs.callback_attempts IS 'Number of callback delivery attempts (for retry tracking)'; -- ============================================================================= -- SECTION 5: RESULT SUMMARY -- Normalized summary for cross-platform dashboards -- ============================================================================= -- JSONB summary of results for quick dashboard queries -- Contains pre-computed metrics without loading full reviews_data -- Example: {"total_reviews": 150, "avg_rating": 4.2, "sentiment": {"positive": 80, "negative": 20}} ALTER TABLE jobs ADD COLUMN IF NOT EXISTS result_summary JSONB; COMMENT ON COLUMN jobs.result_summary IS 'JSONB summary for dashboards: review counts, ratings, sentiment breakdown'; -- ============================================================================= -- SECTION 6: INDEXES -- Optimized for common query patterns -- ============================================================================= -- Index for client-based queries (per-client job history, analytics) CREATE INDEX IF NOT EXISTS idx_jobs_requester_client_id ON jobs(requester_client_id) WHERE requester_client_id IS NOT NULL; -- Index for batch operations (get all jobs in a batch) CREATE INDEX IF NOT EXISTS idx_jobs_batch_id ON jobs(batch_id) WHERE batch_id IS NOT NULL; -- Index for job type filtering (when multiple scrapers exist) CREATE INDEX IF NOT EXISTS idx_jobs_job_type ON jobs(job_type); -- Index for priority queue ordering (high priority jobs first) -- Composite with status for efficient "get next job" queries CREATE INDEX IF NOT EXISTS idx_jobs_priority_status ON jobs(priority DESC, status, created_at ASC) WHERE status = 'pending'; -- Index for requester source analytics CREATE INDEX IF NOT EXISTS idx_jobs_requester_source ON jobs(requester_source) WHERE requester_source IS NOT NULL; -- Index for callback retry processing CREATE INDEX IF NOT EXISTS idx_jobs_callback_pending ON jobs(callback_status, callback_attempts) WHERE callback_status IN ('pending', 'failed'); -- Composite index for scraper version analytics CREATE INDEX IF NOT EXISTS idx_jobs_scraper_version ON jobs(scraper_version, scraper_variant) WHERE scraper_version IS NOT NULL; -- ============================================================================= -- SECTION 7: CONSTRAINTS -- Data integrity for new fields -- ============================================================================= -- Ensure valid scrape_purpose values ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scrape_purpose; ALTER TABLE jobs ADD CONSTRAINT valid_scrape_purpose CHECK (scrape_purpose IS NULL OR scrape_purpose IN ('client_report', 'prospect_screening', 'market_research')); -- Ensure valid scraper_variant values ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scraper_variant; ALTER TABLE jobs ADD CONSTRAINT valid_scraper_variant CHECK (scraper_variant IS NULL OR scraper_variant IN ('stable', 'beta', 'canary')); -- Ensure valid callback_status values ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_callback_status; ALTER TABLE jobs ADD CONSTRAINT valid_callback_status CHECK (callback_status IS NULL OR callback_status IN ('pending', 'sent', 'failed')); -- Ensure valid priority range ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_priority; ALTER TABLE jobs ADD CONSTRAINT valid_priority CHECK (priority >= 0 AND priority <= 2); -- Ensure batch_index is positive when set ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_batch_index; ALTER TABLE jobs ADD CONSTRAINT valid_batch_index CHECK (batch_index IS NULL OR batch_index > 0); -- ============================================================================= -- END OF MIGRATION -- ============================================================================= -- -- Rollback commands (if needed): -- -- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_client_id; -- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_source; -- ALTER TABLE jobs DROP COLUMN IF EXISTS scrape_purpose; -- ALTER TABLE jobs DROP COLUMN IF EXISTS requester_metadata; -- ALTER TABLE jobs DROP COLUMN IF EXISTS batch_id; -- ALTER TABLE jobs DROP COLUMN IF EXISTS batch_index; -- ALTER TABLE jobs DROP COLUMN IF EXISTS job_type; -- ALTER TABLE jobs DROP COLUMN IF EXISTS scraper_version; -- ALTER TABLE jobs DROP COLUMN IF EXISTS scraper_variant; -- ALTER TABLE jobs DROP COLUMN IF EXISTS priority; -- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_url; -- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_status; -- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_sent_at; -- ALTER TABLE jobs DROP COLUMN IF EXISTS callback_attempts; -- ALTER TABLE jobs DROP COLUMN IF EXISTS result_summary; -- -- DROP INDEX IF EXISTS idx_jobs_requester_client_id; -- DROP INDEX IF EXISTS idx_jobs_batch_id; -- DROP INDEX IF EXISTS idx_jobs_job_type; -- DROP INDEX IF EXISTS idx_jobs_priority_status; -- DROP INDEX IF EXISTS idx_jobs_requester_source; -- DROP INDEX IF EXISTS idx_jobs_callback_pending; -- DROP INDEX IF EXISTS idx_jobs_scraper_version; -- -- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scrape_purpose; -- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_scraper_variant; -- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_callback_status; -- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_priority; -- ALTER TABLE jobs DROP CONSTRAINT IF EXISTS valid_batch_index; -- -- =============================================================================