feat(broker): production hardening — caps, limits, metrics, logging
Adds the minimum ops surface area for a production broker without
over-engineering. All new config knobs are env-var driven with sane
defaults.
New modules:
- logger.ts: structured JSON logs (one line, stderr, ready for
Loki/Datadog ingestion without preprocessing)
- metrics.ts: in-process Prometheus counters + gauges, exposed at
GET /metrics. Tracks connections, messages, queue depth, TTL
sweeps, hook requests, DB health.
- rate-limit.ts: token-bucket rate limiter keyed by (pid, cwd).
Applied to POST /hook/set-status at 30/min default.
- db-health.ts: Postgres ping loop with exponential-backoff retry.
GET /health returns 503 while DB is down.
- build-info.ts: version + gitSha (from GIT_SHA env or `git rev-parse`
fallback) + uptime, surfaced on /health.
Behavior changes:
- Connection caps: MAX_CONNECTIONS_PER_MESH (default 100). Exceed →
close(1008, "capacity") + metric increment.
- Message size: MAX_MESSAGE_BYTES (default 65536). WS applies it via
`ws.maxPayload`. Hook POST bodies cap out with 413.
- Structured logs everywhere replacing the old `log()` helper.
- Env validation stricter: DATABASE_URL required + regex-checked for
postgres:// prefix.
New endpoints:
- GET /health → {status, db, version, gitSha, uptime}. 503 if DB down.
- GET /metrics → Prometheus text format.
Verified: 21/21 tests still pass. Hit /health + /metrics live —
gitSha resolves correctly via `git rev-parse --short HEAD` in dev.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,18 +4,26 @@ import { z } from "zod";
|
||||
* Broker environment config.
|
||||
*
|
||||
* Validated at startup with Zod. Fails fast with a useful error if any
|
||||
* required var is missing or malformed. Defaults mirror the values
|
||||
* proven out in the claude-intercom prototype so local dev works
|
||||
* without a .env file.
|
||||
* required var is missing or malformed.
|
||||
*/
|
||||
const envSchema = z.object({
|
||||
BROKER_PORT: z.coerce.number().int().positive().default(7900),
|
||||
DATABASE_URL: z.string().min(1, "DATABASE_URL is required"),
|
||||
DATABASE_URL: z
|
||||
.string()
|
||||
.min(1, "DATABASE_URL is required")
|
||||
.refine(
|
||||
(u) => /^postgres(ql)?:\/\//.test(u),
|
||||
"DATABASE_URL must be a postgres:// or postgresql:// connection string",
|
||||
),
|
||||
STATUS_TTL_SECONDS: z.coerce.number().int().positive().default(60),
|
||||
HOOK_FRESH_WINDOW_SECONDS: z.coerce.number().int().positive().default(30),
|
||||
MAX_CONNECTIONS_PER_MESH: z.coerce.number().int().positive().default(100),
|
||||
MAX_MESSAGE_BYTES: z.coerce.number().int().positive().default(65_536),
|
||||
HOOK_RATE_LIMIT_PER_MIN: z.coerce.number().int().positive().default(30),
|
||||
NODE_ENV: z
|
||||
.enum(["development", "production", "test"])
|
||||
.default("development"),
|
||||
GIT_SHA: z.string().optional(),
|
||||
});
|
||||
|
||||
export type BrokerEnv = z.infer<typeof envSchema>;
|
||||
|
||||
Reference in New Issue
Block a user