feat: turbostarter boilerplate

Production-ready Next.js boilerplate with: - Runtime env validation (fail-fast on missing vars) - Feature-gated config (S3, Stripe, email, OAuth) - Docker + Coolify deployment pipeline - PostgreSQL + pgvector, MinIO S3, Better Auth - TypeScript strict mode (no ignoreBuildErrors) - i18n (en/es), AI modules, billing, monitoring Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 17:29:12 +00:00
commit 3527e732d4
1618 changed files with 338230 additions and 0 deletions
--- a/packages/cognitive-context/src/tokens.ts
+++ b/packages/cognitive-context/src/tokens.ts
@@ -0,0 +1,306 @@
+/**
+ * Token counting and budget management module
+ *
+ * Uses js-tiktoken with cl100k_base encoding (GPT-4 compatible)
+ * for accurate token counting.
+ */
+
+import { readFile } from 'node:fs/promises';
+import { getEncoding, Tiktoken } from 'js-tiktoken';
+import type { TokenBudget, TokenReport } from './types.js';
+
+// ============================================
+// Encoder Instance (cached for performance)
+// ============================================
+
+let encoderInstance: Tiktoken | null = null;
+
+/**
+ * Get or create the tiktoken encoder instance
+ * Uses cl100k_base encoding (GPT-4/ChatGPT compatible)
+ */
+function getEncoder(): Tiktoken {
+  if (!encoderInstance) {
+    encoderInstance = getEncoding('cl100k_base');
+  }
+  return encoderInstance;
+}
+
+// ============================================
+// Core Token Counting
+// ============================================
+
+/**
+ * Count the number of tokens in a text string
+ *
+ * @param text - The text to count tokens for
+ * @returns The number of tokens
+ *
+ * @example
+ * const count = countTokens('Hello, world!');
+ * console.log(count); // 4
+ */
+export function countTokens(text: string): number {
+  if (!text || text.length === 0) {
+    return 0;
+  }
+
+  try {
+    const encoder = getEncoder();
+    const tokens = encoder.encode(text);
+    return tokens.length;
+  } catch (error) {
+    // Handle encoding errors gracefully - estimate based on characters
+    // Rough estimate: ~4 characters per token for English text
+    console.warn('Token encoding failed, using character-based estimate:', error);
+    return Math.ceil(text.length / 4);
+  }
+}
+
+/**
+ * Count tokens in a file
+ *
+ * @param filePath - Path to the file to count tokens for
+ * @param budget - Optional budget to compare against (defaults to Infinity)
+ * @returns TokenReport with file statistics
+ *
+ * @example
+ * const report = await countFileTokens('./README.md', 1000);
+ * console.log(report.overBudget); // false if under 1000 tokens
+ */
+export async function countFileTokens(
+  filePath: string,
+  budget: number = Infinity
+): Promise<TokenReport> {
+  try {
+    const content = await readFile(filePath, 'utf-8');
+    const tokens = countTokens(content);
+
+    return {
+      file: filePath,
+      tokens,
+      budget,
+      overBudget: tokens > budget,
+      percentage: budget === Infinity ? 0 : Math.round((tokens / budget) * 100),
+    };
+  } catch (error) {
+    // If file can't be read, return a report indicating the error
+    // with 0 tokens (can't count what we can't read)
+    const message = error instanceof Error ? error.message : 'Unknown error';
+    console.warn(`Failed to read file ${filePath}: ${message}`);
+
+    return {
+      file: filePath,
+      tokens: 0,
+      budget,
+      overBudget: false,
+      percentage: 0,
+    };
+  }
+}
+
+// ============================================
+// Budget Validation
+// ============================================
+
+/**
+ * Validate token reports against a budget configuration
+ *
+ * @param reports - Array of TokenReport objects to validate
+ * @param budget - TokenBudget configuration with limits
+ * @returns Validation result with valid flag and any violations
+ *
+ * @example
+ * const reports = [
+ *   { file: 'SUMMARY.md', tokens: 250, budget: 300, overBudget: false, percentage: 83 },
+ *   { file: 'wisdom/guide.md', tokens: 2000, budget: 1500, overBudget: true, percentage: 133 },
+ * ];
+ * const result = validateBudget(reports, budget);
+ * console.log(result.valid); // false
+ * console.log(result.violations); // [{ file: 'wisdom/guide.md', ... }]
+ */
+export function validateBudget(
+  reports: TokenReport[],
+  budget: TokenBudget
+): { valid: boolean; violations: TokenReport[] } {
+  const violations: TokenReport[] = [];
+
+  // Check individual file budgets
+  for (const report of reports) {
+    if (report.overBudget) {
+      violations.push(report);
+    }
+  }
+
+  // Check total budget
+  const totalTokens = reports.reduce((sum, r) => sum + r.tokens, 0);
+  if (totalTokens > budget.total) {
+    // Add a synthetic report for total budget violation
+    violations.push({
+      file: '[TOTAL]',
+      tokens: totalTokens,
+      budget: budget.total,
+      overBudget: true,
+      percentage: Math.round((totalTokens / budget.total) * 100),
+    });
+  }
+
+  return {
+    valid: violations.length === 0,
+    violations,
+  };
+}
+
+// ============================================
+// Text Truncation
+// ============================================
+
+/**
+ * Truncate text to fit within a token limit
+ *
+ * Preserves complete words/sentences when possible by truncating
+ * at sentence or word boundaries.
+ *
+ * @param text - The text to truncate
+ * @param maxTokens - Maximum number of tokens allowed
+ * @returns Truncated text that fits within the limit
+ *
+ * @example
+ * const longText = 'This is a very long text...';
+ * const truncated = truncateToTokenLimit(longText, 10);
+ * console.log(countTokens(truncated) <= 10); // true
+ */
+export function truncateToTokenLimit(text: string, maxTokens: number): string {
+  if (!text || maxTokens <= 0) {
+    return '';
+  }
+
+  const currentTokens = countTokens(text);
+  if (currentTokens <= maxTokens) {
+    return text;
+  }
+
+  // Binary search for the right length
+  // Start with an estimate based on the ratio
+  const ratio = maxTokens / currentTokens;
+  let low = 0;
+  let high = text.length;
+  let result = '';
+
+  // Initial estimate
+  let mid = Math.floor(text.length * ratio * 0.9); // Start slightly under
+
+  // Refine with binary search
+  while (low < high) {
+    mid = Math.floor((low + high + 1) / 2);
+    const substring = text.slice(0, mid);
+    const tokens = countTokens(substring);
+
+    if (tokens <= maxTokens) {
+      result = substring;
+      low = mid;
+    } else {
+      high = mid - 1;
+    }
+  }
+
+  // Try to find a clean break point (sentence or word boundary)
+  const cleanBreak = findCleanBreakPoint(result);
+
+  return cleanBreak || result;
+}
+
+/**
+ * Find a clean break point in text (sentence or word boundary)
+ */
+function findCleanBreakPoint(text: string): string {
+  if (!text) return '';
+
+  // Try to find the last sentence boundary
+  const sentenceMatch = text.match(/^(.+[.!?])\s*[^.!?]*$/);
+  if (sentenceMatch && sentenceMatch[1].length > text.length * 0.7) {
+    return sentenceMatch[1];
+  }
+
+  // Fall back to last word boundary
+  const wordMatch = text.match(/^(.+)\s+\S*$/);
+  if (wordMatch && wordMatch[1].length > text.length * 0.8) {
+    return wordMatch[1];
+  }
+
+  // If no good break point, return as-is
+  return text;
+}
+
+// ============================================
+// Utility Functions
+// ============================================
+
+/**
+ * Create a TokenReport for a given file path and content
+ *
+ * @param file - File path
+ * @param content - File content
+ * @param budget - Token budget for this file
+ * @returns TokenReport
+ */
+export function createTokenReport(
+  file: string,
+  content: string,
+  budget: number
+): TokenReport {
+  const tokens = countTokens(content);
+  return {
+    file,
+    tokens,
+    budget,
+    overBudget: tokens > budget,
+    percentage: budget > 0 ? Math.round((tokens / budget) * 100) : 0,
+  };
+}
+
+/**
+ * Get budget for a specific file type based on TokenBudget config
+ *
+ * @param filePath - The file path
+ * @param budget - TokenBudget configuration
+ * @returns The applicable budget for this file
+ */
+export function getBudgetForFile(filePath: string, budget: TokenBudget): number {
+  const lowerPath = filePath.toLowerCase();
+
+  if (lowerPath.includes('summary')) {
+    return budget.summary;
+  }
+
+  if (lowerPath.includes('capabilities')) {
+    return budget.capabilities;
+  }
+
+  if (lowerPath.includes('wisdom')) {
+    return budget.wisdomPerFile;
+  }
+
+  // Default to total budget if no specific category matches
+  return budget.total;
+}
+
+/**
+ * Format a token count for display
+ *
+ * @param tokens - Number of tokens
+ * @returns Formatted string (e.g., "1.2k" for 1200)
+ */
+export function formatTokenCount(tokens: number): string {
+  if (tokens >= 1000) {
+    return `${(tokens / 1000).toFixed(1)}k`;
+  }
+  return tokens.toString();
+}
+
+/**
+ * Reset the encoder instance (useful for testing)
+ */
+export function resetEncoder(): void {
+  encoderInstance = null;
+}