Migrations created: - 001_add_job_platform_fields.sql: Add 15 new columns to jobs table - Requester tracking (client_id, source, purpose, metadata) - Batch support (batch_id, batch_index) - Execution tracking (job_type, scraper_version, variant, priority) - Webhook callbacks (url, status, sent_at, attempts) - Result summary (JSONB for cross-type dashboard) - 7 indexes for query performance - 5 CHECK constraints for data validation - 002_create_batches_table.sql: Batch job grouping - Tracks batch progress (total/completed/failed) - Batch-level callbacks - Requester association - 003_create_scraper_registry.sql: Scraper version management - Version routing (stable/beta/canary variants) - A/B traffic splitting (traffic_pct) - Priority-based routing - Seeds google_reviews v1.0.0 as stable default - 004_create_api_keys.sql: API authentication - Secure key storage (SHA-256 hashes, not plaintext) - Scopes-based permissions - Rate limiting support - Key lifecycle (expiry, active status) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
95 lines
5.2 KiB
SQL
95 lines
5.2 KiB
SQL
-- Migration: 002_create_batches_table.sql
|
|
-- Description: Creates the batches table for grouping multiple scrape jobs together
|
|
-- Author: ReviewIQ Platform
|
|
-- Date: 2026-01-24
|
|
--
|
|
-- The batches table allows clients to submit multiple places/jobs in a single request,
|
|
-- track aggregate progress, and receive a single callback when all jobs complete.
|
|
-- This is useful for bulk operations like "screen all 50 prospects" or "refresh all locations".
|
|
|
|
-- =============================================================================
|
|
-- CREATE TABLE: batches
|
|
-- =============================================================================
|
|
-- A batch represents a collection of scrape jobs submitted together.
|
|
-- It tracks overall progress and handles consolidated callbacks.
|
|
--
|
|
-- Foreign Key Reference:
|
|
-- jobs.batch_id -> batches.id (defined in jobs table migration)
|
|
-- When a batch is created, individual jobs reference it via batch_id.
|
|
-- =============================================================================
|
|
|
|
CREATE TABLE batches (
|
|
-- Primary key
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
|
|
-- Requester identification
|
|
-- Tracks which client/system submitted this batch and why
|
|
requester_client_id VARCHAR(255), -- Client identifier (e.g., "acme-corp", "internal-audit")
|
|
requester_source VARCHAR(100), -- Source system (e.g., "salesforce", "hubspot", "api")
|
|
scrape_purpose VARCHAR(50), -- Purpose code (e.g., "screening", "monitoring", "audit")
|
|
|
|
-- Batch metadata
|
|
name VARCHAR(255), -- Human-readable name (e.g., "Q1 Prospect Screening")
|
|
total_jobs INTEGER NOT NULL DEFAULT 0, -- Total number of jobs in this batch
|
|
completed_jobs INTEGER DEFAULT 0, -- Count of successfully completed jobs
|
|
failed_jobs INTEGER DEFAULT 0, -- Count of failed jobs
|
|
status VARCHAR(20) DEFAULT 'pending', -- Batch status: pending, running, completed
|
|
|
|
-- Callback configuration
|
|
-- When all jobs complete, optionally notify a webhook endpoint
|
|
callback_url TEXT, -- Webhook URL to call on batch completion
|
|
callback_status VARCHAR(20), -- Callback result: pending, success, failed
|
|
callback_sent_at TIMESTAMP, -- When the callback was sent
|
|
|
|
-- Timestamps
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- When batch was created
|
|
completed_at TIMESTAMP, -- When batch finished (all jobs done)
|
|
|
|
-- Flexible metadata storage
|
|
-- Allows clients to attach arbitrary data for their own tracking needs
|
|
metadata JSONB -- Custom client data (e.g., {"campaign_id": "123"})
|
|
);
|
|
|
|
-- =============================================================================
|
|
-- INDEXES
|
|
-- =============================================================================
|
|
-- These indexes optimize common query patterns:
|
|
|
|
-- Index for looking up batches by client
|
|
-- Used when clients query "show me all my batches"
|
|
CREATE INDEX idx_batches_requester_client_id ON batches(requester_client_id);
|
|
|
|
-- Index for filtering by status
|
|
-- Used for dashboards showing pending/running/completed batches
|
|
CREATE INDEX idx_batches_status ON batches(status);
|
|
|
|
-- Index for time-based queries
|
|
-- Used for "recent batches", cleanup jobs, and analytics
|
|
CREATE INDEX idx_batches_created_at ON batches(created_at);
|
|
|
|
-- Composite index for common dashboard query pattern
|
|
-- Optimizes: "show me pending batches for client X ordered by creation time"
|
|
CREATE INDEX idx_batches_client_status_created ON batches(requester_client_id, status, created_at DESC);
|
|
|
|
-- =============================================================================
|
|
-- COMMENTS
|
|
-- =============================================================================
|
|
|
|
COMMENT ON TABLE batches IS 'Groups multiple scrape jobs for batch processing with aggregate tracking and callbacks';
|
|
|
|
COMMENT ON COLUMN batches.id IS 'Unique identifier for the batch (UUID)';
|
|
COMMENT ON COLUMN batches.requester_client_id IS 'Identifier of the client who submitted this batch';
|
|
COMMENT ON COLUMN batches.requester_source IS 'Source system that originated the request (e.g., salesforce, api)';
|
|
COMMENT ON COLUMN batches.scrape_purpose IS 'Purpose of the scrape (screening, monitoring, audit, etc.)';
|
|
COMMENT ON COLUMN batches.name IS 'Human-readable batch name for display purposes';
|
|
COMMENT ON COLUMN batches.total_jobs IS 'Total number of jobs in this batch';
|
|
COMMENT ON COLUMN batches.completed_jobs IS 'Number of jobs that completed successfully';
|
|
COMMENT ON COLUMN batches.failed_jobs IS 'Number of jobs that failed';
|
|
COMMENT ON COLUMN batches.status IS 'Current batch status: pending, running, or completed';
|
|
COMMENT ON COLUMN batches.callback_url IS 'Webhook URL to notify when batch completes';
|
|
COMMENT ON COLUMN batches.callback_status IS 'Result of callback attempt: pending, success, or failed';
|
|
COMMENT ON COLUMN batches.callback_sent_at IS 'Timestamp when callback was sent';
|
|
COMMENT ON COLUMN batches.created_at IS 'When the batch was created';
|
|
COMMENT ON COLUMN batches.completed_at IS 'When the batch finished processing';
|
|
COMMENT ON COLUMN batches.metadata IS 'Arbitrary JSON metadata for client-specific needs';
|