feat(broker): filename-tracked migration runner replaces drizzle's
drizzle's _journal.json drifted to idx=11 while the file system had 25 .sql files; the prod drizzle.__drizzle_migrations table was further behind with 3 rows. The runtime migrator silently skipped anything outside the journal, so every new schema change required psql -f by hand. The new runner tracks applied files in mesh.__cmh_migrations (filename PK + sha256 + applied_at). On startup it bootstraps the tracking table inline, lists migrations/*.sql lexicographically, filters out already-applied files, and runs the rest in transaction order under the existing pg_advisory_lock. SHA mismatches on already-applied files emit a warning but don't fail (cosmetic edits are common); production drift detection lives elsewhere. Bootstrap script at apps/broker/scripts/bootstrap-cmh-migrations.ts computes file hashes and seeds the tracking table — already run against prod with all 25 current files registered as applied. Future deploys pick up only truly new migrations. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
87
apps/broker/scripts/bootstrap-cmh-migrations.ts
Normal file
87
apps/broker/scripts/bootstrap-cmh-migrations.ts
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
/**
|
||||||
|
* One-shot bootstrap for the new mesh.__cmh_migrations tracking table.
|
||||||
|
*
|
||||||
|
* Run this against an EXISTING prod DB exactly once before deploying
|
||||||
|
* the new runtime migrator. It:
|
||||||
|
* 1. Creates mesh.__cmh_migrations if it doesn't exist
|
||||||
|
* 2. Hashes every .sql file in packages/db/migrations
|
||||||
|
* 3. Inserts a row per file (filename + sha256) with applied_at = NOW()
|
||||||
|
* 4. ON CONFLICT (filename) DO NOTHING — safe to re-run
|
||||||
|
*
|
||||||
|
* The script does NOT execute any migration SQL — it only seeds the
|
||||||
|
* tracking table to reflect the schema state that was previously
|
||||||
|
* applied by drizzle (or by hand). After this runs, the broker's
|
||||||
|
* startup migrator will treat 0000..N as already-applied and only
|
||||||
|
* apply truly new files going forward.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* DATABASE_URL=... bun apps/broker/scripts/bootstrap-cmh-migrations.ts
|
||||||
|
*
|
||||||
|
* Safe to run multiple times. Output prints per-file status.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import postgres from "postgres";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const url = process.env.DATABASE_URL;
|
||||||
|
if (!url) {
|
||||||
|
console.error("DATABASE_URL not set");
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
const candidates = [
|
||||||
|
join(process.cwd(), "..", "..", "packages", "db", "migrations"),
|
||||||
|
join(process.cwd(), "packages", "db", "migrations"),
|
||||||
|
"/app/migrations",
|
||||||
|
];
|
||||||
|
const folder = candidates.find((p) => existsSync(p));
|
||||||
|
if (!folder) {
|
||||||
|
console.error("migrations folder not found");
|
||||||
|
process.exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
const files = readdirSync(folder).filter((f) => f.endsWith(".sql")).sort();
|
||||||
|
console.log(`bootstrap · ${files.length} files at ${folder}`);
|
||||||
|
|
||||||
|
const sql = postgres(url, { max: 1, onnotice: () => {} });
|
||||||
|
try {
|
||||||
|
await sql.unsafe(`
|
||||||
|
CREATE SCHEMA IF NOT EXISTS mesh;
|
||||||
|
CREATE TABLE IF NOT EXISTS mesh.__cmh_migrations (
|
||||||
|
filename TEXT PRIMARY KEY,
|
||||||
|
sha256 TEXT NOT NULL,
|
||||||
|
applied_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
|
||||||
|
let inserted = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
for (const f of files) {
|
||||||
|
const content = readFileSync(join(folder, f), "utf8");
|
||||||
|
const sha = createHash("sha256").update(content).digest("hex");
|
||||||
|
const result = await sql`
|
||||||
|
INSERT INTO mesh.__cmh_migrations (filename, sha256)
|
||||||
|
VALUES (${f}, ${sha})
|
||||||
|
ON CONFLICT (filename) DO NOTHING
|
||||||
|
RETURNING filename
|
||||||
|
`;
|
||||||
|
if (result.length > 0) {
|
||||||
|
inserted += 1;
|
||||||
|
console.log(` + ${f} ${sha.slice(0, 12)}…`);
|
||||||
|
} else {
|
||||||
|
skipped += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(`bootstrap done · ${inserted} inserted, ${skipped} already tracked`);
|
||||||
|
} finally {
|
||||||
|
await sql.end({ timeout: 5 });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((e) => {
|
||||||
|
console.error("bootstrap failed:", e);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -1,30 +1,57 @@
|
|||||||
/**
|
/**
|
||||||
* Runtime migrations on broker startup.
|
* Runtime migrations on broker startup.
|
||||||
*
|
*
|
||||||
* Runs pending drizzle migrations against DATABASE_URL before the broker
|
* Replaced drizzle's migrator with a filename-tracked runner because
|
||||||
* listens. Uses pg_try_advisory_lock with retry+timeout so a stuck old
|
* drizzle's _journal.json drifted on the filesystem (last entry was
|
||||||
* instance can't block new deploys indefinitely (the original
|
* idx=11; idx 12-24 were never recorded), and the prod
|
||||||
* pg_advisory_lock version matched the "stuck 12h" symptom perfectly —
|
* drizzle.__drizzle_migrations table was even further behind (3 rows
|
||||||
* an old container held the lock and the new deploy waited forever).
|
* for 25 files). The runtime migrator silently skipped anything
|
||||||
|
* outside the journal, so every new schema change required `psql -f`
|
||||||
|
* by hand.
|
||||||
*
|
*
|
||||||
* If migrations fail OR the lock can't be acquired within the timeout,
|
* The new runner tracks applied files in `mesh.__cmh_migrations`
|
||||||
* the process exits non-zero so the orchestrator (Coolify healthcheck)
|
* (filename + sha256 + applied_at). On startup:
|
||||||
* sees the container as broken and doesn't route traffic to it.
|
* 1. Acquire advisory lock (unchanged)
|
||||||
|
* 2. CREATE TABLE IF NOT EXISTS for the tracking table
|
||||||
|
* 3. Read applied filenames from the table
|
||||||
|
* 4. List `migrations/*.sql` lexicographically; filter out applied
|
||||||
|
* 5. For each unapplied: BEGIN; execute file; INSERT row; COMMIT
|
||||||
|
* 6. For each applied: optionally verify sha matches; warn (don't
|
||||||
|
* fail) on mismatch — devs reformat migrations sometimes
|
||||||
|
*
|
||||||
|
* Bootstrap: run `apps/broker/scripts/bootstrap-cmh-migrations.ts`
|
||||||
|
* against an existing prod DB to seed the tracking table with the
|
||||||
|
* currently-applied set. Without that, the runner would try to
|
||||||
|
* re-apply 0000-0024 and fail on duplicate-table errors.
|
||||||
|
*
|
||||||
|
* Failure modes (all exit non-zero so Coolify healthcheck fails closed):
|
||||||
|
* - DATABASE_URL missing
|
||||||
|
* - lock acquisition timeout
|
||||||
|
* - migration SQL error mid-application
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { drizzle } from "drizzle-orm/postgres-js";
|
|
||||||
import { migrate } from "drizzle-orm/postgres-js/migrator";
|
|
||||||
import postgres from "postgres";
|
import postgres from "postgres";
|
||||||
import { join } from "node:path";
|
import { join } from "node:path";
|
||||||
import { existsSync, readdirSync } from "node:fs";
|
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
|
|
||||||
const LOCK_ID = 74737_73831; // "cmsh" ascii — stable magic constant
|
const LOCK_ID = 74737_73831; // "cmsh" ascii — stable magic constant
|
||||||
|
|
||||||
/** Max total time to wait for the advisory lock before giving up. */
|
|
||||||
const LOCK_ACQUIRE_TIMEOUT_MS = 60_000;
|
const LOCK_ACQUIRE_TIMEOUT_MS = 60_000;
|
||||||
/** Poll interval when lock is held by another instance. */
|
|
||||||
const LOCK_RETRY_INTERVAL_MS = 2_000;
|
const LOCK_RETRY_INTERVAL_MS = 2_000;
|
||||||
|
|
||||||
|
const TRACKING_TABLE_DDL = `
|
||||||
|
CREATE SCHEMA IF NOT EXISTS mesh;
|
||||||
|
CREATE TABLE IF NOT EXISTS mesh.__cmh_migrations (
|
||||||
|
filename TEXT PRIMARY KEY,
|
||||||
|
sha256 TEXT NOT NULL,
|
||||||
|
applied_at TIMESTAMP NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
`;
|
||||||
|
|
||||||
|
function sha256Hex(content: string): string {
|
||||||
|
return createHash("sha256").update(content).digest("hex");
|
||||||
|
}
|
||||||
|
|
||||||
export async function runMigrationsOnStartup(): Promise<void> {
|
export async function runMigrationsOnStartup(): Promise<void> {
|
||||||
const url = process.env.DATABASE_URL;
|
const url = process.env.DATABASE_URL;
|
||||||
if (!url) {
|
if (!url) {
|
||||||
@@ -43,20 +70,17 @@ export async function runMigrationsOnStartup(): Promise<void> {
|
|||||||
console.error("[migrate] migrations folder not found — skipping. Searched:", candidates);
|
console.error("[migrate] migrations folder not found — skipping. Searched:", candidates);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const count = readdirSync(migrationsFolder).filter((f) => f.endsWith(".sql")).length;
|
|
||||||
console.log(`[migrate] ${count} migration files at ${migrationsFolder}`);
|
|
||||||
|
|
||||||
const sql = postgres(url, {
|
const allFiles = readdirSync(migrationsFolder)
|
||||||
max: 1,
|
.filter((f) => f.endsWith(".sql"))
|
||||||
onnotice: () => { /* quiet */ },
|
.sort(); // lexicographic = numeric for 0000_*..9999_*
|
||||||
});
|
console.log(`[migrate] ${allFiles.length} migration files at ${migrationsFolder}`);
|
||||||
|
|
||||||
|
const sql = postgres(url, { max: 1, onnotice: () => {} });
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// SET doesn't accept parameterized values ($1) — use unsafe() for
|
|
||||||
// the literal. The value is a hardcoded constant, not user input.
|
|
||||||
await sql.unsafe(`SET lock_timeout = '${LOCK_ACQUIRE_TIMEOUT_MS}ms'`);
|
await sql.unsafe(`SET lock_timeout = '${LOCK_ACQUIRE_TIMEOUT_MS}ms'`);
|
||||||
|
|
||||||
// Try to grab the advisory lock; poll if someone else holds it.
|
|
||||||
const deadline = Date.now() + LOCK_ACQUIRE_TIMEOUT_MS;
|
const deadline = Date.now() + LOCK_ACQUIRE_TIMEOUT_MS;
|
||||||
let locked = false;
|
let locked = false;
|
||||||
while (Date.now() < deadline) {
|
while (Date.now() < deadline) {
|
||||||
@@ -76,10 +100,58 @@ export async function runMigrationsOnStartup(): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const db = drizzle(sql);
|
// Bootstrap the tracking table itself. Idempotent.
|
||||||
const start = Date.now();
|
await sql.unsafe(TRACKING_TABLE_DDL);
|
||||||
await migrate(db, { migrationsFolder });
|
|
||||||
console.log(`[migrate] ok (${Date.now() - start}ms)`);
|
const applied = await sql<{ filename: string; sha256: string }[]>`
|
||||||
|
SELECT filename, sha256 FROM mesh.__cmh_migrations
|
||||||
|
`;
|
||||||
|
const appliedMap = new Map(applied.map((r) => [r.filename, r.sha256]));
|
||||||
|
|
||||||
|
const pending: Array<{ filename: string; sha: string; content: string }> = [];
|
||||||
|
for (const filename of allFiles) {
|
||||||
|
const path = join(migrationsFolder, filename);
|
||||||
|
const content = readFileSync(path, "utf8");
|
||||||
|
const sha = sha256Hex(content);
|
||||||
|
const knownSha = appliedMap.get(filename);
|
||||||
|
if (!knownSha) {
|
||||||
|
pending.push({ filename, sha, content });
|
||||||
|
} else if (knownSha !== sha) {
|
||||||
|
// File content changed after application. Don't re-run; warn.
|
||||||
|
// Hard-fail would block legit cosmetic edits (whitespace,
|
||||||
|
// comments). Production drift detection lives elsewhere.
|
||||||
|
console.warn(
|
||||||
|
`[migrate] sha mismatch for ${filename} — file modified post-apply (was ${knownSha.slice(0, 12)}…, now ${sha.slice(0, 12)}…)`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pending.length === 0) {
|
||||||
|
console.log(`[migrate] up to date · ${applied.length} applied`);
|
||||||
|
} else {
|
||||||
|
console.log(`[migrate] applying ${pending.length} pending: ${pending.map((p) => p.filename).join(", ")}`);
|
||||||
|
for (const m of pending) {
|
||||||
|
const start = Date.now();
|
||||||
|
try {
|
||||||
|
await sql.begin(async (tx) => {
|
||||||
|
// drizzle migrations use `--> statement-breakpoint` to
|
||||||
|
// separate statements; postgres-js can run a multi-stmt
|
||||||
|
// script via .unsafe(), but transactional rollback wraps
|
||||||
|
// everything as one unit which is what we want.
|
||||||
|
await tx.unsafe(m.content);
|
||||||
|
await tx`
|
||||||
|
INSERT INTO mesh.__cmh_migrations (filename, sha256)
|
||||||
|
VALUES (${m.filename}, ${m.sha})
|
||||||
|
`;
|
||||||
|
});
|
||||||
|
console.log(`[migrate] ✓ ${m.filename} (${Date.now() - start}ms)`);
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`[migrate] ✗ ${m.filename}:`, e instanceof Error ? e.message : e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log(`[migrate] ok`);
|
||||||
|
}
|
||||||
} finally {
|
} finally {
|
||||||
await sql`SELECT pg_advisory_unlock(${LOCK_ID})`;
|
await sql`SELECT pg_advisory_unlock(${LOCK_ID})`;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user