-- Migration: 002_create_batches_table.sql -- Description: Creates the batches table for grouping multiple scrape jobs together -- Author: ReviewIQ Platform -- Date: 2026-01-24 -- -- The batches table allows clients to submit multiple places/jobs in a single request, -- track aggregate progress, and receive a single callback when all jobs complete. -- This is useful for bulk operations like "screen all 50 prospects" or "refresh all locations". -- ============================================================================= -- CREATE TABLE: batches -- ============================================================================= -- A batch represents a collection of scrape jobs submitted together. -- It tracks overall progress and handles consolidated callbacks. -- -- Foreign Key Reference: -- jobs.batch_id -> batches.id (defined in jobs table migration) -- When a batch is created, individual jobs reference it via batch_id. -- ============================================================================= CREATE TABLE batches ( -- Primary key id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- Requester identification -- Tracks which client/system submitted this batch and why requester_client_id VARCHAR(255), -- Client identifier (e.g., "acme-corp", "internal-audit") requester_source VARCHAR(100), -- Source system (e.g., "salesforce", "hubspot", "api") scrape_purpose VARCHAR(50), -- Purpose code (e.g., "screening", "monitoring", "audit") -- Batch metadata name VARCHAR(255), -- Human-readable name (e.g., "Q1 Prospect Screening") total_jobs INTEGER NOT NULL DEFAULT 0, -- Total number of jobs in this batch completed_jobs INTEGER DEFAULT 0, -- Count of successfully completed jobs failed_jobs INTEGER DEFAULT 0, -- Count of failed jobs status VARCHAR(20) DEFAULT 'pending', -- Batch status: pending, running, completed -- Callback configuration -- When all jobs complete, optionally notify a webhook endpoint callback_url TEXT, -- Webhook URL to call on batch completion callback_status VARCHAR(20), -- Callback result: pending, success, failed callback_sent_at TIMESTAMP, -- When the callback was sent -- Timestamps created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- When batch was created completed_at TIMESTAMP, -- When batch finished (all jobs done) -- Flexible metadata storage -- Allows clients to attach arbitrary data for their own tracking needs metadata JSONB -- Custom client data (e.g., {"campaign_id": "123"}) ); -- ============================================================================= -- INDEXES -- ============================================================================= -- These indexes optimize common query patterns: -- Index for looking up batches by client -- Used when clients query "show me all my batches" CREATE INDEX idx_batches_requester_client_id ON batches(requester_client_id); -- Index for filtering by status -- Used for dashboards showing pending/running/completed batches CREATE INDEX idx_batches_status ON batches(status); -- Index for time-based queries -- Used for "recent batches", cleanup jobs, and analytics CREATE INDEX idx_batches_created_at ON batches(created_at); -- Composite index for common dashboard query pattern -- Optimizes: "show me pending batches for client X ordered by creation time" CREATE INDEX idx_batches_client_status_created ON batches(requester_client_id, status, created_at DESC); -- ============================================================================= -- COMMENTS -- ============================================================================= COMMENT ON TABLE batches IS 'Groups multiple scrape jobs for batch processing with aggregate tracking and callbacks'; COMMENT ON COLUMN batches.id IS 'Unique identifier for the batch (UUID)'; COMMENT ON COLUMN batches.requester_client_id IS 'Identifier of the client who submitted this batch'; COMMENT ON COLUMN batches.requester_source IS 'Source system that originated the request (e.g., salesforce, api)'; COMMENT ON COLUMN batches.scrape_purpose IS 'Purpose of the scrape (screening, monitoring, audit, etc.)'; COMMENT ON COLUMN batches.name IS 'Human-readable batch name for display purposes'; COMMENT ON COLUMN batches.total_jobs IS 'Total number of jobs in this batch'; COMMENT ON COLUMN batches.completed_jobs IS 'Number of jobs that completed successfully'; COMMENT ON COLUMN batches.failed_jobs IS 'Number of jobs that failed'; COMMENT ON COLUMN batches.status IS 'Current batch status: pending, running, or completed'; COMMENT ON COLUMN batches.callback_url IS 'Webhook URL to notify when batch completes'; COMMENT ON COLUMN batches.callback_status IS 'Result of callback attempt: pending, success, or failed'; COMMENT ON COLUMN batches.callback_sent_at IS 'Timestamp when callback was sent'; COMMENT ON COLUMN batches.created_at IS 'When the batch was created'; COMMENT ON COLUMN batches.completed_at IS 'When the batch finished processing'; COMMENT ON COLUMN batches.metadata IS 'Arbitrary JSON metadata for client-specific needs';