feat(broker): encrypt env vars at rest, restore on reboot
Some checks failed
CI / Lint (push) Has been cancelled
CI / Typecheck (push) Has been cancelled
CI / Broker tests (Postgres) (push) Has been cancelled
CI / Docker build (linux/amd64) (push) Has been cancelled

- broker-crypto.ts: AES-256-GCM encrypt/decrypt with BROKER_ENCRYPTION_KEY
- mcp_deploy stores env as _encryptedEnv in mesh.service.config (no plaintext in DB)
- boot restore: decrypts _encryptedEnv and re-spawns services via service-manager
- auto-generates ephemeral key if BROKER_ENCRYPTION_KEY not set (logs warning)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alejandro Gutiérrez
2026-04-08 12:25:48 +01:00
parent 75ca892ea7
commit 070a3b7422
3 changed files with 122 additions and 2 deletions

View File

@@ -0,0 +1,68 @@
/**
* Broker-side symmetric encryption for persisting resolved env vars.
*
* Uses Node's built-in crypto (AES-256-GCM). The key comes from
* BROKER_ENCRYPTION_KEY env var (64 hex chars = 32 bytes). If not set,
* a random key is generated and logged on first use — operator should
* persist it to survive broker restarts.
*
* This is NOT the same as peer-side E2E crypto (libsodium). This is
* platform-level encryption-at-rest, same model as Heroku/Coolify/AWS.
*/
import { createCipheriv, createDecipheriv, randomBytes } from "node:crypto";
import { env } from "./env";
import { log } from "./logger";
const ALGO = "aes-256-gcm";
const IV_LEN = 12;
const TAG_LEN = 16;
let _key: Buffer | null = null;
function getKey(): Buffer {
if (_key) return _key;
if (env.BROKER_ENCRYPTION_KEY && env.BROKER_ENCRYPTION_KEY.length === 64) {
_key = Buffer.from(env.BROKER_ENCRYPTION_KEY, "hex");
} else {
_key = randomBytes(32);
log.warn("BROKER_ENCRYPTION_KEY not set — generated ephemeral key. " +
"Set BROKER_ENCRYPTION_KEY=" + _key.toString("hex") + " to persist across restarts.");
}
return _key;
}
/**
* Encrypt a JSON-serializable value. Returns a base64 string containing
* IV + ciphertext + auth tag.
*/
export function encryptForStorage(plaintext: string): string {
const key = getKey();
const iv = randomBytes(IV_LEN);
const cipher = createCipheriv(ALGO, key, iv);
const encrypted = Buffer.concat([cipher.update(plaintext, "utf8"), cipher.final()]);
const tag = cipher.getAuthTag();
// Pack: IV (12) + tag (16) + ciphertext
return Buffer.concat([iv, tag, encrypted]).toString("base64");
}
/**
* Decrypt a value produced by encryptForStorage. Returns the plaintext
* string, or null if decryption fails (wrong key, tampered).
*/
export function decryptFromStorage(packed: string): string | null {
try {
const key = getKey();
const buf = Buffer.from(packed, "base64");
const iv = buf.subarray(0, IV_LEN);
const tag = buf.subarray(IV_LEN, IV_LEN + TAG_LEN);
const ciphertext = buf.subarray(IV_LEN + TAG_LEN);
const decipher = createDecipheriv(ALGO, key, iv);
decipher.setAuthTag(tag);
const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
return decrypted.toString("utf8");
} catch {
return null;
}
}

View File

@@ -29,6 +29,7 @@ const envSchema = z.object({
NEO4J_USER: z.string().default("neo4j"),
NEO4J_PASSWORD: z.string().default("changeme"),
CLAUDEMESH_SERVICES_DIR: z.string().default("/var/claudemesh/services"),
BROKER_ENCRYPTION_KEY: z.string().default(""), // 64 hex chars (32 bytes). Auto-generated if empty.
MAX_SERVICES_PER_MESH: z.coerce.number().int().positive().default(20),
MAX_SERVICE_ZIP_BYTES: z.coerce.number().int().positive().default(50 * 1024 * 1024),
NODE_ENV: z

View File

@@ -18,7 +18,7 @@ import { WebSocketServer, type WebSocket } from "ws";
import { and, eq, isNull, sql } from "drizzle-orm";
import { env } from "./env";
import { db } from "./db";
import { messageQueue, scheduledMessage as scheduledMessageTable, meshWebhook, peerState } from "@turbostarter/db/schema/mesh";
import { mesh, messageQueue, scheduledMessage as scheduledMessageTable, meshWebhook, peerState } from "@turbostarter/db/schema/mesh";
import {
claimTask,
completeTask,
@@ -79,6 +79,7 @@ import {
getService,
listDbMeshServices,
deleteService,
getRunningServices,
} from "./broker";
import * as serviceManager from "./service-manager";
import { ensureBucket, meshBucketName, minioClient } from "./minio";
@@ -3178,12 +3179,19 @@ function handleConnection(ws: WebSocket): void {
sendError(ws, "limit", `max ${env.MAX_SERVICES_PER_MESH} services per mesh`, undefined, md._reqId);
break;
}
// Encrypt env vars at rest (broker-side AES-256-GCM)
const deployConfig = { ...(md.config ?? {}) };
if (deployConfig.env && Object.keys(deployConfig.env).length > 0) {
const { encryptForStorage } = await import("./broker-crypto");
deployConfig._encryptedEnv = encryptForStorage(JSON.stringify(deployConfig.env));
delete deployConfig.env; // don't store plaintext in DB
}
await upsertService(conn.meshId, md.server_name, {
type: "mcp", sourceType: md.source.type, description: `MCP server: ${md.server_name}`,
sourceFileId: md.source.type === "zip" ? md.source.file_id : undefined,
sourceGitUrl: md.source.type === "git" ? md.source.url : undefined,
sourceGitBranch: md.source.type === "git" ? md.source.branch : undefined,
runtime: md.config?.runtime, status: "building", config: md.config ?? {},
runtime: md.config?.runtime, status: "building", config: deployConfig,
scope: md.scope ?? "peer", deployedBy: conn.memberId, deployedByName: conn.displayName,
});
sendToPeer(presenceId, { type: "mcp_deploy_status", server_name: md.server_name, status: "building", _reqId: md._reqId } as any);
@@ -3564,6 +3572,49 @@ function main(): void {
startDbHealth();
serviceManager.startHealthChecks();
// Restore managed services that were running before broker restart
(async () => {
try {
const { decryptFromStorage } = await import("./broker-crypto");
// Get all meshes with running services
const allMeshes = await db.select({ id: mesh.id }).from(mesh);
for (const m of allMeshes) {
const running = await getRunningServices(m.id);
if (running.length === 0) continue;
log.info("restoring services for mesh", { mesh_id: m.id, count: running.length });
for (const svc of running) {
try {
const config = (svc.config as Record<string, unknown>) ?? {};
// Decrypt env vars if stored encrypted
let resolvedEnv: Record<string, string> | undefined;
if (config._encryptedEnv) {
const decrypted = decryptFromStorage(config._encryptedEnv as string);
if (decrypted) {
resolvedEnv = JSON.parse(decrypted);
} else {
log.warn("failed to decrypt env for service", { service: svc.name });
}
}
const sourcePath = `${env.CLAUDEMESH_SERVICES_DIR}/${m.id}/${svc.name}/source`;
await serviceManager.deploy({
meshId: m.id,
name: svc.name,
sourcePath,
config: { runtime: svc.runtime as any, ...(config.memory_mb ? { memory_mb: config.memory_mb as number } : {}) },
resolvedEnv,
});
log.info("service restored", { service: svc.name, mesh_id: m.id });
} catch (e) {
log.error("service restore failed", { service: svc.name, error: e instanceof Error ? e.message : String(e) });
await updateServiceStatus(m.id, svc.name, "failed");
}
}
}
} catch (e) {
log.error("service restore error", { error: e instanceof Error ? e.message : String(e) });
}
})();
// Ensure audit log table exists and load hash chain state
ensureAuditLogTable()
.then(() => loadLastHashes())