feat(broker): production hardening — caps, limits, metrics, logging
Adds the minimum ops surface area for a production broker without
over-engineering. All new config knobs are env-var driven with sane
defaults.
New modules:
- logger.ts: structured JSON logs (one line, stderr, ready for
Loki/Datadog ingestion without preprocessing)
- metrics.ts: in-process Prometheus counters + gauges, exposed at
GET /metrics. Tracks connections, messages, queue depth, TTL
sweeps, hook requests, DB health.
- rate-limit.ts: token-bucket rate limiter keyed by (pid, cwd).
Applied to POST /hook/set-status at 30/min default.
- db-health.ts: Postgres ping loop with exponential-backoff retry.
GET /health returns 503 while DB is down.
- build-info.ts: version + gitSha (from GIT_SHA env or `git rev-parse`
fallback) + uptime, surfaced on /health.
Behavior changes:
- Connection caps: MAX_CONNECTIONS_PER_MESH (default 100). Exceed →
close(1008, "capacity") + metric increment.
- Message size: MAX_MESSAGE_BYTES (default 65536). WS applies it via
`ws.maxPayload`. Hook POST bodies cap out with 413.
- Structured logs everywhere replacing the old `log()` helper.
- Env validation stricter: DATABASE_URL required + regex-checked for
postgres:// prefix.
New endpoints:
- GET /health → {status, db, version, gitSha, uptime}. 503 if DB down.
- GET /metrics → Prometheus text format.
Verified: 21/21 tests still pass. Hit /health + /metrics live —
gitSha resolves correctly via `git rev-parse --short HEAD` in dev.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
121
apps/broker/src/metrics.ts
Normal file
121
apps/broker/src/metrics.ts
Normal file
@@ -0,0 +1,121 @@
|
||||
/**
|
||||
* Minimal in-process metrics, exposed as Prometheus plaintext.
|
||||
*
|
||||
* Intentionally no external deps — we track a handful of counters
|
||||
* and gauges that matter for broker ops. Scraped by /metrics.
|
||||
*/
|
||||
|
||||
type Labels = Record<string, string | number>;
|
||||
|
||||
class Counter {
|
||||
private values = new Map<string, number>();
|
||||
constructor(
|
||||
public name: string,
|
||||
public help: string,
|
||||
) {}
|
||||
inc(labels: Labels = {}, by = 1): void {
|
||||
const key = labelKey(labels);
|
||||
this.values.set(key, (this.values.get(key) ?? 0) + by);
|
||||
}
|
||||
toText(): string {
|
||||
const lines = [`# HELP ${this.name} ${this.help}`, `# TYPE ${this.name} counter`];
|
||||
if (this.values.size === 0) {
|
||||
lines.push(`${this.name} 0`);
|
||||
} else {
|
||||
for (const [key, v] of this.values) {
|
||||
lines.push(`${this.name}${key} ${v}`);
|
||||
}
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
}
|
||||
|
||||
class Gauge {
|
||||
private values = new Map<string, number>();
|
||||
constructor(
|
||||
public name: string,
|
||||
public help: string,
|
||||
) {}
|
||||
set(value: number, labels: Labels = {}): void {
|
||||
this.values.set(labelKey(labels), value);
|
||||
}
|
||||
inc(labels: Labels = {}, by = 1): void {
|
||||
const key = labelKey(labels);
|
||||
this.values.set(key, (this.values.get(key) ?? 0) + by);
|
||||
}
|
||||
dec(labels: Labels = {}, by = 1): void {
|
||||
this.inc(labels, -by);
|
||||
}
|
||||
toText(): string {
|
||||
const lines = [`# HELP ${this.name} ${this.help}`, `# TYPE ${this.name} gauge`];
|
||||
if (this.values.size === 0) {
|
||||
lines.push(`${this.name} 0`);
|
||||
} else {
|
||||
for (const [key, v] of this.values) {
|
||||
lines.push(`${this.name}${key} ${v}`);
|
||||
}
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
}
|
||||
|
||||
function labelKey(labels: Labels): string {
|
||||
const entries = Object.entries(labels);
|
||||
if (entries.length === 0) return "";
|
||||
const parts = entries
|
||||
.sort(([a], [b]) => a.localeCompare(b))
|
||||
.map(([k, v]) => `${k}="${String(v).replace(/"/g, '\\"')}"`)
|
||||
.join(",");
|
||||
return `{${parts}}`;
|
||||
}
|
||||
|
||||
export const metrics = {
|
||||
connectionsTotal: new Counter(
|
||||
"broker_connections_total",
|
||||
"Total WS connection attempts",
|
||||
),
|
||||
connectionsRejected: new Counter(
|
||||
"broker_connections_rejected_total",
|
||||
"WS connections refused (auth failure, capacity, etc.)",
|
||||
),
|
||||
connectionsActive: new Gauge(
|
||||
"broker_connections_active",
|
||||
"Currently connected peers",
|
||||
),
|
||||
messagesRoutedTotal: new Counter(
|
||||
"broker_messages_routed_total",
|
||||
"Messages successfully queued + routed",
|
||||
),
|
||||
messagesRejectedTotal: new Counter(
|
||||
"broker_messages_rejected_total",
|
||||
"Messages rejected (size, auth, malformed)",
|
||||
),
|
||||
queueDepth: new Gauge(
|
||||
"broker_queue_depth",
|
||||
"Undelivered messages currently in the queue",
|
||||
),
|
||||
ttlSweepsTotal: new Counter(
|
||||
"broker_ttl_sweeps_total",
|
||||
"TTL sweeper runs completed",
|
||||
),
|
||||
hookRequestsTotal: new Counter(
|
||||
"broker_hook_requests_total",
|
||||
"POST /hook/set-status requests received",
|
||||
),
|
||||
hookRequestsRateLimited: new Counter(
|
||||
"broker_hook_requests_rate_limited_total",
|
||||
"POST /hook/set-status rejected by rate limit",
|
||||
),
|
||||
dbHealthy: new Gauge(
|
||||
"broker_db_healthy",
|
||||
"1 if Postgres connection is up, 0 if not",
|
||||
),
|
||||
};
|
||||
|
||||
export function metricsToText(): string {
|
||||
return (
|
||||
Object.values(metrics)
|
||||
.map((m) => m.toText())
|
||||
.join("\n") + "\n"
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user