Files
whyrating-engine-legacy/migrations/versions/004_create_api_keys.sql
Alejandro Gutiérrez 2412996c54 Phase 1: Database migrations for platform features
Migrations created:
- 001_add_job_platform_fields.sql: Add 15 new columns to jobs table
  - Requester tracking (client_id, source, purpose, metadata)
  - Batch support (batch_id, batch_index)
  - Execution tracking (job_type, scraper_version, variant, priority)
  - Webhook callbacks (url, status, sent_at, attempts)
  - Result summary (JSONB for cross-type dashboard)
  - 7 indexes for query performance
  - 5 CHECK constraints for data validation

- 002_create_batches_table.sql: Batch job grouping
  - Tracks batch progress (total/completed/failed)
  - Batch-level callbacks
  - Requester association

- 003_create_scraper_registry.sql: Scraper version management
  - Version routing (stable/beta/canary variants)
  - A/B traffic splitting (traffic_pct)
  - Priority-based routing
  - Seeds google_reviews v1.0.0 as stable default

- 004_create_api_keys.sql: API authentication
  - Secure key storage (SHA-256 hashes, not plaintext)
  - Scopes-based permissions
  - Rate limiting support
  - Key lifecycle (expiry, active status)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 15:24:28 +00:00

80 lines
4.0 KiB
SQL

-- Migration: 004_create_api_keys.sql
-- Description: Create api_keys table for API authentication
-- Created: 2026-01-24
--
-- Security Model:
-- ================
-- API keys are NEVER stored in plain text. When a new API key is generated:
-- 1. A random key is generated (e.g., "riq_abc123xyz...")
-- 2. The full key is returned to the user ONCE and never stored
-- 3. We store only the SHA-256 hash of the key (key_hash)
-- 4. We store the first 8 characters (key_prefix) for identification in logs/UI
--
-- Authentication Flow:
-- 1. Client sends API key in Authorization header
-- 2. Server hashes the received key with SHA-256
-- 3. Server looks up the hash in this table
-- 4. If found and is_active=true and not expired, request is authenticated
--
-- This approach ensures that even if the database is compromised,
-- attackers cannot recover the actual API keys.
-- ============================================================================
-- CREATE TABLE
-- ============================================================================
CREATE TABLE api_keys (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
-- Key identification (security: we store hash, not the actual key)
key_hash VARCHAR(64) NOT NULL UNIQUE, -- SHA-256 hash of API key (64 hex chars)
key_prefix VARCHAR(8) NOT NULL, -- First 8 chars for identification in UI/logs
name VARCHAR(255) NOT NULL, -- Human-readable name, e.g., "Veritas Production Key"
-- Client association
client_id VARCHAR(255) NOT NULL, -- External client identifier, e.g., "veritas_client_123"
-- Permissions (PostgreSQL array of allowed scopes)
scopes TEXT[] DEFAULT '{}', -- e.g., {"jobs:read", "jobs:write", "admin"}
-- Rate limiting
rate_limit_rpm INTEGER DEFAULT 60, -- Maximum requests per minute for this key
-- Status
is_active BOOLEAN DEFAULT true, -- Set to false to revoke without deleting
-- Timestamps
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_used_at TIMESTAMP, -- Updated on each successful authentication
expires_at TIMESTAMP, -- NULL means the key never expires
-- Extensible metadata (for future use: IP allowlists, custom limits, etc.)
metadata JSONB
);
-- ============================================================================
-- INDEXES
-- ============================================================================
-- Primary lookup index: This is the critical path for every API request.
-- When authenticating, we hash the provided key and look it up here.
-- UNIQUE constraint already creates an index, but we're explicit for clarity.
CREATE INDEX idx_api_keys_key_hash ON api_keys (key_hash);
-- Client lookup index: For admin operations like "list all keys for client X"
-- or "revoke all keys for client X"
CREATE INDEX idx_api_keys_client_id ON api_keys (client_id);
-- Active keys index: Useful for filtering active/inactive keys in queries
-- Partial index for efficiency (only indexes active keys)
CREATE INDEX idx_api_keys_active ON api_keys (is_active) WHERE is_active = true;
-- ============================================================================
-- COMMENTS
-- ============================================================================
COMMENT ON TABLE api_keys IS 'API keys for authenticating external clients. Keys are stored as SHA-256 hashes for security.';
COMMENT ON COLUMN api_keys.key_hash IS 'SHA-256 hash of the API key. The actual key is never stored.';
COMMENT ON COLUMN api_keys.key_prefix IS 'First 8 characters of the key for identification in UI and logs.';
COMMENT ON COLUMN api_keys.scopes IS 'Array of permission scopes: jobs:read, jobs:write, admin, etc.';
COMMENT ON COLUMN api_keys.rate_limit_rpm IS 'Rate limit in requests per minute. NULL uses system default.';
COMMENT ON COLUMN api_keys.metadata IS 'Extensible JSON metadata: IP allowlists, usage notes, etc.';