feat: Add decoupled pipeline schema with separate PostgreSQL namespace

- Create consolidated migration (005_create_pipeline_schema.sql) with 'pipeline' schema for all classification tables - Update pipeline repositories to use schema prefix (pipeline.*) - Add run_migrations() method to DatabaseManager - Add CLI tool for running versioned migrations Tables created in pipeline schema: - reviews_raw, reviews_enriched (Stage 1) - review_spans (Stage 2) - issues, issue_spans, issue_events (Stage 3) - fact_timeseries (Stage 4) - urt_domains, urt_categories (taxonomy lookup) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-24 18:17:20 +00:00
parent 7d720f5378
commit 03ed7029e2
4 changed files with 710 additions and 23 deletions
--- a/migrations/versions/005_create_pipeline_schema.sql
+++ b/migrations/versions/005_create_pipeline_schema.sql
@@ -0,0 +1,544 @@
+-- =============================================================================
+-- Migration: 005_create_pipeline_schema.sql
+-- ReviewIQ Pipeline - Decoupled Schema
+-- =============================================================================
+--
+-- Creates a separate 'pipeline' schema for all review classification tables.
+-- This keeps the pipeline tables decoupled from the main scraper schema while
+-- sharing the same database.
+--
+-- Tables created:
+--   pipeline.reviews_raw       - Immutable audit log of scraped reviews
+--   pipeline.reviews_enriched  - Normalized/classified reviews
+--   pipeline.review_spans      - Extracted semantic spans
+--   pipeline.issues            - Aggregated issues from negative spans
+--   pipeline.issue_spans       - Issue-to-span linking
+--   pipeline.issue_events      - Audit log for issue changes
+--   pipeline.fact_timeseries   - Pre-aggregated metrics for dashboards
+--   pipeline.urt_domains       - URT taxonomy domains
+--   pipeline.urt_categories    - URT taxonomy categories
+--
+-- Soft FK: pipeline.reviews_raw.job_id -> public.jobs.job_id (optional)
+--
+-- Date: 2026-01-24
+-- =============================================================================
+
+-- Create the pipeline schema
+CREATE SCHEMA IF NOT EXISTS pipeline;
+
+COMMENT ON SCHEMA pipeline IS 'ReviewIQ Pipeline - LLM-powered review classification and aggregation';
+
+
+-- =============================================================================
+-- SECTION 1: ENUM TYPES (in pipeline schema)
+-- =============================================================================
+
+-- Valence enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.valence_type AS ENUM ('V+', 'V-', 'V0', 'V±');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Intensity enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.intensity_type AS ENUM ('I1', 'I2', 'I3');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Specificity enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.specificity_type AS ENUM ('S1', 'S2', 'S3');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Actionability enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.actionability_type AS ENUM ('A1', 'A2', 'A3');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Temporal enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.temporal_type AS ENUM ('TC', 'TR', 'TH', 'TF');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Evidence enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.evidence_type AS ENUM ('ES', 'EI', 'EC');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Comparative enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.comparative_type AS ENUM ('CR-N', 'CR-B', 'CR-W', 'CR-S');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Issue state enum
+DO $$ BEGIN
+    CREATE TYPE pipeline.issue_state AS ENUM ('open', 'resolved', 'ignored', 'merged');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Subject type enum (for facts)
+DO $$ BEGIN
+    CREATE TYPE pipeline.subject_type AS ENUM ('overall', 'urt_code', 'domain', 'issue');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+-- Bucket type enum (for facts)
+DO $$ BEGIN
+    CREATE TYPE pipeline.bucket_type AS ENUM ('day', 'week', 'month');
+EXCEPTION
+    WHEN duplicate_object THEN NULL;
+END $$;
+
+
+-- =============================================================================
+-- SECTION 2: URT TAXONOMY LOOKUP TABLES
+-- =============================================================================
+
+-- URT Domain lookup table
+CREATE TABLE IF NOT EXISTS pipeline.urt_domains (
+    code CHAR(1) PRIMARY KEY,
+    name VARCHAR(50) NOT NULL,
+    description TEXT
+);
+
+INSERT INTO pipeline.urt_domains (code, name, description) VALUES
+    ('O', 'Offering', 'Product/service quality, features, variety'),
+    ('P', 'Price', 'Value, pricing, promotions, payment'),
+    ('J', 'Journey', 'Timing, process, convenience, accessibility'),
+    ('E', 'Environment', 'Physical space, ambiance, cleanliness, digital UX'),
+    ('A', 'Attitude', 'Staff behavior, helpfulness, professionalism'),
+    ('V', 'Voice', 'Brand, communication, marketing, transparency'),
+    ('R', 'Relationship', 'Loyalty, trust, consistency, personalization')
+ON CONFLICT (code) DO NOTHING;
+
+-- URT Tier-2 categories lookup table
+CREATE TABLE IF NOT EXISTS pipeline.urt_categories (
+    code VARCHAR(5) PRIMARY KEY,
+    domain_code CHAR(1) NOT NULL REFERENCES pipeline.urt_domains(code),
+    name VARCHAR(100) NOT NULL,
+    description TEXT
+);
+
+INSERT INTO pipeline.urt_categories (code, domain_code, name) VALUES
+    ('O1', 'O', 'Core Product/Service'),
+    ('O2', 'O', 'Product Features'),
+    ('O3', 'O', 'Variety & Selection'),
+    ('O4', 'O', 'Customization'),
+    ('P1', 'P', 'Value Perception'),
+    ('P2', 'P', 'Pricing Structure'),
+    ('P3', 'P', 'Promotions & Deals'),
+    ('P4', 'P', 'Payment Process'),
+    ('J1', 'J', 'Wait Times'),
+    ('J2', 'J', 'Booking & Reservations'),
+    ('J3', 'J', 'Navigation & Convenience'),
+    ('J4', 'J', 'Accessibility'),
+    ('E1', 'E', 'Physical Environment'),
+    ('E2', 'E', 'Ambiance & Atmosphere'),
+    ('E3', 'E', 'Cleanliness'),
+    ('E4', 'E', 'Digital Experience'),
+    ('A1', 'A', 'Friendliness'),
+    ('A2', 'A', 'Helpfulness'),
+    ('A3', 'A', 'Professionalism'),
+    ('A4', 'A', 'Knowledge & Expertise'),
+    ('V1', 'V', 'Brand Identity'),
+    ('V2', 'V', 'Communication'),
+    ('V3', 'V', 'Marketing'),
+    ('V4', 'V', 'Transparency'),
+    ('R1', 'R', 'Loyalty'),
+    ('R2', 'R', 'Trust'),
+    ('R3', 'R', 'Consistency'),
+    ('R4', 'R', 'Personalization')
+ON CONFLICT (code) DO NOTHING;
+
+COMMENT ON TABLE pipeline.urt_domains IS 'URT v5.1 top-level domains';
+COMMENT ON TABLE pipeline.urt_categories IS 'URT v5.1 Tier-2 categories';
+
+
+-- =============================================================================
+-- SECTION 3: STAGE 1 - RAW & ENRICHED REVIEWS
+-- =============================================================================
+
+-- Raw reviews table (immutable audit log)
+CREATE TABLE IF NOT EXISTS pipeline.reviews_raw (
+    id BIGSERIAL PRIMARY KEY,
+
+    -- Link to scraper job (soft FK to public.jobs)
+    job_id UUID,
+
+    source VARCHAR(20) NOT NULL DEFAULT 'google',
+    review_id VARCHAR(255) NOT NULL,
+    place_id VARCHAR(255) NOT NULL,
+    raw_payload JSONB NOT NULL DEFAULT '{}',
+    review_text TEXT,
+    rating SMALLINT NOT NULL CHECK (rating BETWEEN 1 AND 5),
+    review_time TIMESTAMP WITH TIME ZONE NOT NULL,
+    reviewer_name VARCHAR(255) NOT NULL,
+    reviewer_id VARCHAR(255),
+    review_version INTEGER NOT NULL DEFAULT 1,
+    pulled_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+
+    CONSTRAINT reviews_raw_unique UNIQUE (source, review_id, review_version)
+);
+
+-- Indexes for reviews_raw
+CREATE INDEX IF NOT EXISTS idx_reviews_raw_job_id ON pipeline.reviews_raw(job_id) WHERE job_id IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_reviews_raw_place_id ON pipeline.reviews_raw(place_id);
+CREATE INDEX IF NOT EXISTS idx_reviews_raw_review_time ON pipeline.reviews_raw(review_time);
+CREATE INDEX IF NOT EXISTS idx_reviews_raw_pulled_at ON pipeline.reviews_raw(pulled_at);
+
+COMMENT ON TABLE pipeline.reviews_raw IS 'Immutable raw review data as scraped from source';
+COMMENT ON COLUMN pipeline.reviews_raw.job_id IS 'Optional link to public.jobs.job_id for traceability';
+
+
+-- Enriched reviews table (mutable, updated by classification)
+CREATE TABLE IF NOT EXISTS pipeline.reviews_enriched (
+    id BIGSERIAL PRIMARY KEY,
+    source VARCHAR(20) NOT NULL DEFAULT 'google',
+    review_id VARCHAR(255) NOT NULL,
+    review_version INTEGER NOT NULL DEFAULT 1,
+    is_latest BOOLEAN NOT NULL DEFAULT TRUE,
+    raw_id BIGINT REFERENCES pipeline.reviews_raw(id),
+
+    -- Tenant context
+    business_id VARCHAR(255) NOT NULL,
+    place_id VARCHAR(255) NOT NULL,
+
+    -- Content
+    text TEXT NOT NULL,
+    text_normalized TEXT NOT NULL,
+    rating SMALLINT NOT NULL CHECK (rating BETWEEN 1 AND 5),
+    review_time TIMESTAMP WITH TIME ZONE NOT NULL,
+
+    -- Normalization fields
+    language VARCHAR(10) NOT NULL DEFAULT 'en',
+    taxonomy_version VARCHAR(20) NOT NULL DEFAULT 'v5.1',
+
+    -- Classification fields (NULL until Stage 2)
+    urt_primary VARCHAR(10),
+    urt_secondary VARCHAR(10)[] DEFAULT '{}',
+    valence VARCHAR(5),
+    intensity VARCHAR(5),
+    comparative VARCHAR(10),
+    staff_mentions VARCHAR(255)[] DEFAULT '{}',
+    quotes JSONB DEFAULT '{}',
+    embedding REAL[] DEFAULT '{}',
+    trust_score REAL,
+    classification_model VARCHAR(100),
+    classification_confidence JSONB DEFAULT '{}',
+    processed_at TIMESTAMP WITH TIME ZONE,
+
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+
+    CONSTRAINT reviews_enriched_unique UNIQUE (source, review_id, review_version)
+);
+
+-- Indexes for reviews_enriched
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_business_id ON pipeline.reviews_enriched(business_id);
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_place_id ON pipeline.reviews_enriched(place_id);
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_review_time ON pipeline.reviews_enriched(review_time);
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_urt_primary ON pipeline.reviews_enriched(urt_primary) WHERE urt_primary IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_unclassified ON pipeline.reviews_enriched(review_time DESC) WHERE urt_primary IS NULL AND is_latest = TRUE;
+CREATE INDEX IF NOT EXISTS idx_reviews_enriched_valence ON pipeline.reviews_enriched(valence) WHERE valence IS NOT NULL;
+
+COMMENT ON TABLE pipeline.reviews_enriched IS 'Enriched reviews with normalization and classification';
+
+
+-- =============================================================================
+-- SECTION 4: STAGE 2 - REVIEW SPANS
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS pipeline.review_spans (
+    id BIGSERIAL PRIMARY KEY,
+    span_id VARCHAR(50) NOT NULL UNIQUE,
+
+    -- Context
+    business_id VARCHAR(255) NOT NULL,
+    place_id VARCHAR(255) NOT NULL,
+    source VARCHAR(20) NOT NULL DEFAULT 'google',
+    review_id VARCHAR(255) NOT NULL,
+    review_version INTEGER NOT NULL DEFAULT 1,
+
+    -- Position
+    span_index INTEGER NOT NULL CHECK (span_index >= 0),
+    span_text TEXT NOT NULL,
+    span_start INTEGER NOT NULL CHECK (span_start >= 0),
+    span_end INTEGER NOT NULL CHECK (span_end > span_start),
+
+    -- Classification profile
+    profile VARCHAR(20) NOT NULL DEFAULT 'standard',
+
+    -- Core URT classification
+    urt_primary VARCHAR(10) NOT NULL,
+    urt_secondary VARCHAR(10)[] DEFAULT '{}',
+    valence VARCHAR(5) NOT NULL,
+    intensity VARCHAR(5) NOT NULL,
+    comparative VARCHAR(10) NOT NULL DEFAULT 'CR-N',
+
+    -- Extended classification (standard/full profile)
+    specificity VARCHAR(5),
+    actionability VARCHAR(5),
+    temporal VARCHAR(5),
+    evidence VARCHAR(5),
+
+    -- Entity extraction
+    entity VARCHAR(255),
+    entity_type VARCHAR(20),
+    entity_normalized VARCHAR(255),
+
+    -- Causal relations (full profile)
+    relation_type VARCHAR(20),
+    related_span_id VARCHAR(50),
+    causal_chain JSONB,
+
+    -- Flags
+    is_primary BOOLEAN NOT NULL DEFAULT FALSE,
+    is_active BOOLEAN NOT NULL DEFAULT TRUE,
+
+    -- Time reference
+    review_time TIMESTAMP WITH TIME ZONE NOT NULL,
+
+    -- Metadata
+    confidence VARCHAR(10) NOT NULL DEFAULT 'medium',
+    usn VARCHAR(100) NOT NULL,
+    taxonomy_version VARCHAR(20) NOT NULL,
+    model_version VARCHAR(100) NOT NULL,
+    ingest_batch_id VARCHAR(50) NOT NULL,
+
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+
+    -- Foreign key to review
+    CONSTRAINT fk_review FOREIGN KEY (source, review_id, review_version)
+        REFERENCES pipeline.reviews_enriched(source, review_id, review_version)
+);
+
+-- Indexes for review_spans
+CREATE INDEX IF NOT EXISTS idx_spans_business_id ON pipeline.review_spans(business_id);
+CREATE INDEX IF NOT EXISTS idx_spans_place_id ON pipeline.review_spans(place_id);
+CREATE INDEX IF NOT EXISTS idx_spans_review_time ON pipeline.review_spans(review_time);
+CREATE INDEX IF NOT EXISTS idx_spans_urt_primary ON pipeline.review_spans(urt_primary);
+CREATE INDEX IF NOT EXISTS idx_spans_valence ON pipeline.review_spans(valence);
+CREATE INDEX IF NOT EXISTS idx_spans_intensity ON pipeline.review_spans(intensity);
+CREATE INDEX IF NOT EXISTS idx_spans_is_active ON pipeline.review_spans(is_active) WHERE is_active = TRUE;
+CREATE INDEX IF NOT EXISTS idx_spans_is_primary ON pipeline.review_spans(is_primary) WHERE is_primary = TRUE;
+CREATE INDEX IF NOT EXISTS idx_spans_entity_normalized ON pipeline.review_spans(entity_normalized) WHERE entity_normalized IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_spans_batch ON pipeline.review_spans(ingest_batch_id);
+
+-- Index for unrouted negative spans (Stage 3 query)
+CREATE INDEX IF NOT EXISTS idx_spans_unrouted_negative ON pipeline.review_spans(review_time DESC)
+    WHERE is_active = TRUE AND valence IN ('V-', 'V±');
+
+COMMENT ON TABLE pipeline.review_spans IS 'Extracted semantic spans with URT classification from reviews';
+
+
+-- =============================================================================
+-- SECTION 5: STAGE 3 - ISSUES
+-- =============================================================================
+
+-- Issues table
+CREATE TABLE IF NOT EXISTS pipeline.issues (
+    id BIGSERIAL PRIMARY KEY,
+    issue_id VARCHAR(50) NOT NULL UNIQUE,
+
+    -- Context
+    business_id VARCHAR(255) NOT NULL,
+    place_id VARCHAR(255) NOT NULL,
+
+    -- Classification
+    primary_subcode VARCHAR(10) NOT NULL,
+    domain CHAR(1) NOT NULL,
+
+    -- State
+    state pipeline.issue_state NOT NULL DEFAULT 'open',
+    priority_score REAL NOT NULL DEFAULT 1.0,
+    confidence_score REAL NOT NULL DEFAULT 1.0,
+
+    -- Aggregates
+    span_count INTEGER NOT NULL DEFAULT 1,
+    max_intensity VARCHAR(5) NOT NULL DEFAULT 'I1',
+
+    -- Entity (optional - for entity-specific issues)
+    entity VARCHAR(255),
+    entity_normalized VARCHAR(255),
+
+    -- Metadata
+    taxonomy_version VARCHAR(20) NOT NULL,
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+);
+
+-- Issue-span links (1:1 - each span routes to exactly one issue)
+CREATE TABLE IF NOT EXISTS pipeline.issue_spans (
+    id BIGSERIAL PRIMARY KEY,
+    issue_id VARCHAR(50) NOT NULL REFERENCES pipeline.issues(issue_id),
+    span_id VARCHAR(50) NOT NULL UNIQUE,
+
+    -- Review reference
+    source VARCHAR(20) NOT NULL DEFAULT 'google',
+    review_id VARCHAR(255) NOT NULL,
+    review_version INTEGER NOT NULL DEFAULT 1,
+
+    -- Match info
+    is_primary_match BOOLEAN NOT NULL DEFAULT TRUE,
+    intensity VARCHAR(5) NOT NULL,
+    review_time TIMESTAMP WITH TIME ZONE NOT NULL,
+
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+);
+
+-- Issue events (audit log)
+CREATE TABLE IF NOT EXISTS pipeline.issue_events (
+    id BIGSERIAL PRIMARY KEY,
+    issue_id VARCHAR(50) NOT NULL REFERENCES pipeline.issues(issue_id),
+    event_type VARCHAR(50) NOT NULL,
+    span_id VARCHAR(50),
+    old_value TEXT,
+    new_value TEXT,
+    metadata JSONB,
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
+);
+
+-- Indexes for issues
+CREATE INDEX IF NOT EXISTS idx_issues_business_id ON pipeline.issues(business_id);
+CREATE INDEX IF NOT EXISTS idx_issues_place_id ON pipeline.issues(place_id);
+CREATE INDEX IF NOT EXISTS idx_issues_state ON pipeline.issues(state);
+CREATE INDEX IF NOT EXISTS idx_issues_primary_subcode ON pipeline.issues(primary_subcode);
+CREATE INDEX IF NOT EXISTS idx_issues_domain ON pipeline.issues(domain);
+CREATE INDEX IF NOT EXISTS idx_issues_entity_normalized ON pipeline.issues(entity_normalized) WHERE entity_normalized IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_issues_priority ON pipeline.issues(priority_score DESC) WHERE state = 'open';
+CREATE INDEX IF NOT EXISTS idx_issues_created ON pipeline.issues(created_at);
+CREATE INDEX IF NOT EXISTS idx_issues_updated ON pipeline.issues(updated_at);
+
+-- Indexes for issue_spans
+CREATE INDEX IF NOT EXISTS idx_issue_spans_issue_id ON pipeline.issue_spans(issue_id);
+CREATE INDEX IF NOT EXISTS idx_issue_spans_review_time ON pipeline.issue_spans(review_time);
+
+-- Indexes for issue_events
+CREATE INDEX IF NOT EXISTS idx_issue_events_issue_id ON pipeline.issue_events(issue_id);
+CREATE INDEX IF NOT EXISTS idx_issue_events_created ON pipeline.issue_events(created_at);
+CREATE INDEX IF NOT EXISTS idx_issue_events_type ON pipeline.issue_events(event_type);
+
+COMMENT ON TABLE pipeline.issues IS 'Aggregated issues derived from negative/mixed spans';
+COMMENT ON TABLE pipeline.issue_spans IS 'Links between issues and their source spans';
+COMMENT ON TABLE pipeline.issue_events IS 'Audit log for issue state changes';
+
+
+-- =============================================================================
+-- SECTION 6: STAGE 4 - FACT TIMESERIES
+-- =============================================================================
+
+CREATE TABLE IF NOT EXISTS pipeline.fact_timeseries (
+    id BIGSERIAL PRIMARY KEY,
+
+    -- Dimension keys
+    business_id VARCHAR(255) NOT NULL,
+    place_id VARCHAR(255) NOT NULL,  -- Or 'ALL' for rollup
+    period_date DATE NOT NULL,
+    bucket_type pipeline.bucket_type NOT NULL DEFAULT 'day',
+    subject_type pipeline.subject_type NOT NULL DEFAULT 'urt_code',
+    subject_id VARCHAR(50) NOT NULL,  -- URT code, domain letter, or issue_id
+    taxonomy_version VARCHAR(20) NOT NULL,
+
+    -- Core counts
+    review_count INTEGER NOT NULL DEFAULT 0,
+    span_count INTEGER NOT NULL DEFAULT 0,
+
+    -- Valence counts
+    negative_count INTEGER NOT NULL DEFAULT 0,
+    positive_count INTEGER NOT NULL DEFAULT 0,
+    neutral_count INTEGER NOT NULL DEFAULT 0,
+    mixed_count INTEGER NOT NULL DEFAULT 0,
+
+    -- Strength scores
+    strength_score REAL NOT NULL DEFAULT 0.0,
+    negative_strength REAL NOT NULL DEFAULT 0.0,
+    positive_strength REAL NOT NULL DEFAULT 0.0,
+
+    -- Rating
+    avg_rating REAL,
+
+    -- Intensity counts
+    i1_count INTEGER NOT NULL DEFAULT 0,
+    i2_count INTEGER NOT NULL DEFAULT 0,
+    i3_count INTEGER NOT NULL DEFAULT 0,
+
+    -- Comparative counts
+    cr_better INTEGER NOT NULL DEFAULT 0,
+    cr_worse INTEGER NOT NULL DEFAULT 0,
+    cr_same INTEGER NOT NULL DEFAULT 0,
+
+    -- Trust-weighted metrics
+    trust_weighted_strength REAL NOT NULL DEFAULT 0.0,
+    trust_weighted_negative REAL NOT NULL DEFAULT 0.0,
+
+    -- Metadata
+    computed_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
+
+    -- Unique constraint for upsert
+    CONSTRAINT fact_timeseries_unique UNIQUE (
+        business_id, place_id, period_date, bucket_type,
+        subject_type, subject_id, taxonomy_version
+    )
+);
+
+-- Indexes for fact_timeseries
+CREATE INDEX IF NOT EXISTS idx_facts_business_id ON pipeline.fact_timeseries(business_id);
+CREATE INDEX IF NOT EXISTS idx_facts_place_id ON pipeline.fact_timeseries(place_id);
+CREATE INDEX IF NOT EXISTS idx_facts_period ON pipeline.fact_timeseries(period_date);
+CREATE INDEX IF NOT EXISTS idx_facts_bucket ON pipeline.fact_timeseries(bucket_type);
+CREATE INDEX IF NOT EXISTS idx_facts_subject_type ON pipeline.fact_timeseries(subject_type);
+CREATE INDEX IF NOT EXISTS idx_facts_subject_id ON pipeline.fact_timeseries(subject_id);
+
+-- Composite index for common dashboard queries
+CREATE INDEX IF NOT EXISTS idx_facts_dashboard ON pipeline.fact_timeseries(
+    business_id, place_id, bucket_type, period_date DESC
+);
+
+-- Index for specific code trends
+CREATE INDEX IF NOT EXISTS idx_facts_code_trend ON pipeline.fact_timeseries(
+    business_id, subject_id, bucket_type, period_date DESC
+) WHERE subject_type = 'urt_code';
+
+-- Index for domain aggregates
+CREATE INDEX IF NOT EXISTS idx_facts_domain ON pipeline.fact_timeseries(
+    business_id, subject_id, bucket_type, period_date DESC
+) WHERE subject_type = 'domain';
+
+COMMENT ON TABLE pipeline.fact_timeseries IS 'Pre-aggregated time series facts for dashboard queries';
+
+
+-- =============================================================================
+-- SECTION 7: HELPER VIEWS
+-- =============================================================================
+
+-- View for latest enriched reviews only
+CREATE OR REPLACE VIEW pipeline.reviews_latest AS
+SELECT * FROM pipeline.reviews_enriched WHERE is_latest = TRUE;
+
+-- View for open issues with span counts
+CREATE OR REPLACE VIEW pipeline.issues_open AS
+SELECT
+    i.*,
+    COUNT(s.id) as total_spans
+FROM pipeline.issues i
+LEFT JOIN pipeline.issue_spans s ON i.issue_id = s.issue_id
+WHERE i.state = 'open'
+GROUP BY i.id;
+
+COMMENT ON VIEW pipeline.reviews_latest IS 'Latest version of each review';
+COMMENT ON VIEW pipeline.issues_open IS 'Open issues with total span counts';