Phase 1: Database migrations for platform features
Migrations created: - 001_add_job_platform_fields.sql: Add 15 new columns to jobs table - Requester tracking (client_id, source, purpose, metadata) - Batch support (batch_id, batch_index) - Execution tracking (job_type, scraper_version, variant, priority) - Webhook callbacks (url, status, sent_at, attempts) - Result summary (JSONB for cross-type dashboard) - 7 indexes for query performance - 5 CHECK constraints for data validation - 002_create_batches_table.sql: Batch job grouping - Tracks batch progress (total/completed/failed) - Batch-level callbacks - Requester association - 003_create_scraper_registry.sql: Scraper version management - Version routing (stable/beta/canary variants) - A/B traffic splitting (traffic_pct) - Priority-based routing - Seeds google_reviews v1.0.0 as stable default - 004_create_api_keys.sql: API authentication - Secure key storage (SHA-256 hashes, not plaintext) - Scopes-based permissions - Rate limiting support - Key lifecycle (expiry, active status) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
79
migrations/versions/004_create_api_keys.sql
Normal file
79
migrations/versions/004_create_api_keys.sql
Normal file
@@ -0,0 +1,79 @@
|
||||
-- Migration: 004_create_api_keys.sql
|
||||
-- Description: Create api_keys table for API authentication
|
||||
-- Created: 2026-01-24
|
||||
--
|
||||
-- Security Model:
|
||||
-- ================
|
||||
-- API keys are NEVER stored in plain text. When a new API key is generated:
|
||||
-- 1. A random key is generated (e.g., "riq_abc123xyz...")
|
||||
-- 2. The full key is returned to the user ONCE and never stored
|
||||
-- 3. We store only the SHA-256 hash of the key (key_hash)
|
||||
-- 4. We store the first 8 characters (key_prefix) for identification in logs/UI
|
||||
--
|
||||
-- Authentication Flow:
|
||||
-- 1. Client sends API key in Authorization header
|
||||
-- 2. Server hashes the received key with SHA-256
|
||||
-- 3. Server looks up the hash in this table
|
||||
-- 4. If found and is_active=true and not expired, request is authenticated
|
||||
--
|
||||
-- This approach ensures that even if the database is compromised,
|
||||
-- attackers cannot recover the actual API keys.
|
||||
|
||||
-- ============================================================================
|
||||
-- CREATE TABLE
|
||||
-- ============================================================================
|
||||
CREATE TABLE api_keys (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
|
||||
-- Key identification (security: we store hash, not the actual key)
|
||||
key_hash VARCHAR(64) NOT NULL UNIQUE, -- SHA-256 hash of API key (64 hex chars)
|
||||
key_prefix VARCHAR(8) NOT NULL, -- First 8 chars for identification in UI/logs
|
||||
name VARCHAR(255) NOT NULL, -- Human-readable name, e.g., "Veritas Production Key"
|
||||
|
||||
-- Client association
|
||||
client_id VARCHAR(255) NOT NULL, -- External client identifier, e.g., "veritas_client_123"
|
||||
|
||||
-- Permissions (PostgreSQL array of allowed scopes)
|
||||
scopes TEXT[] DEFAULT '{}', -- e.g., {"jobs:read", "jobs:write", "admin"}
|
||||
|
||||
-- Rate limiting
|
||||
rate_limit_rpm INTEGER DEFAULT 60, -- Maximum requests per minute for this key
|
||||
|
||||
-- Status
|
||||
is_active BOOLEAN DEFAULT true, -- Set to false to revoke without deleting
|
||||
|
||||
-- Timestamps
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
last_used_at TIMESTAMP, -- Updated on each successful authentication
|
||||
expires_at TIMESTAMP, -- NULL means the key never expires
|
||||
|
||||
-- Extensible metadata (for future use: IP allowlists, custom limits, etc.)
|
||||
metadata JSONB
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- INDEXES
|
||||
-- ============================================================================
|
||||
|
||||
-- Primary lookup index: This is the critical path for every API request.
|
||||
-- When authenticating, we hash the provided key and look it up here.
|
||||
-- UNIQUE constraint already creates an index, but we're explicit for clarity.
|
||||
CREATE INDEX idx_api_keys_key_hash ON api_keys (key_hash);
|
||||
|
||||
-- Client lookup index: For admin operations like "list all keys for client X"
|
||||
-- or "revoke all keys for client X"
|
||||
CREATE INDEX idx_api_keys_client_id ON api_keys (client_id);
|
||||
|
||||
-- Active keys index: Useful for filtering active/inactive keys in queries
|
||||
-- Partial index for efficiency (only indexes active keys)
|
||||
CREATE INDEX idx_api_keys_active ON api_keys (is_active) WHERE is_active = true;
|
||||
|
||||
-- ============================================================================
|
||||
-- COMMENTS
|
||||
-- ============================================================================
|
||||
COMMENT ON TABLE api_keys IS 'API keys for authenticating external clients. Keys are stored as SHA-256 hashes for security.';
|
||||
COMMENT ON COLUMN api_keys.key_hash IS 'SHA-256 hash of the API key. The actual key is never stored.';
|
||||
COMMENT ON COLUMN api_keys.key_prefix IS 'First 8 characters of the key for identification in UI and logs.';
|
||||
COMMENT ON COLUMN api_keys.scopes IS 'Array of permission scopes: jobs:read, jobs:write, admin, etc.';
|
||||
COMMENT ON COLUMN api_keys.rate_limit_rpm IS 'Rate limit in requests per minute. NULL uses system default.';
|
||||
COMMENT ON COLUMN api_keys.metadata IS 'Extensible JSON metadata: IP allowlists, usage notes, etc.';
|
||||
Reference in New Issue
Block a user