Files
whyrating/packages/cognitive-context/src/tokens.test.ts
2026-02-04 01:55:00 +01:00

375 lines
12 KiB
TypeScript

/**
* Tests for token counting module
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { writeFile, unlink, mkdir, rmdir } from 'node:fs/promises';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import {
countTokens,
countFileTokens,
validateBudget,
truncateToTokenLimit,
createTokenReport,
getBudgetForFile,
formatTokenCount,
resetEncoder,
} from './tokens.js';
import type { TokenBudget, TokenReport } from './types.js';
// ============================================
// Test Setup
// ============================================
describe('tokens', () => {
let testDir: string;
beforeEach(async () => {
resetEncoder();
testDir = join(tmpdir(), `cognitive-test-${Date.now()}`);
await mkdir(testDir, { recursive: true });
});
afterEach(async () => {
try {
// Clean up test files
const files = ['test.txt', 'empty.txt', 'summary.md'];
for (const file of files) {
try {
await unlink(join(testDir, file));
} catch {
// Ignore if file doesn't exist
}
}
await rmdir(testDir);
} catch {
// Ignore cleanup errors
}
});
// ============================================
// countTokens Tests
// ============================================
describe('countTokens', () => {
it('should count tokens in a simple string', () => {
const text = 'Hello, world!';
const count = countTokens(text);
// cl100k_base tokenizes "Hello, world!" into about 4 tokens
expect(count).toBeGreaterThan(0);
expect(count).toBeLessThan(10);
});
it('should return 0 for empty string', () => {
expect(countTokens('')).toBe(0);
});
it('should return 0 for null/undefined-like input', () => {
// @ts-expect-error Testing edge case
expect(countTokens(null)).toBe(0);
// @ts-expect-error Testing edge case
expect(countTokens(undefined)).toBe(0);
});
it('should handle long text', () => {
const longText = 'This is a test sentence. '.repeat(100);
const count = countTokens(longText);
// Should be roughly proportional to length
expect(count).toBeGreaterThan(100);
expect(count).toBeLessThan(1000);
});
it('should handle special characters and unicode', () => {
const text = 'Hello! 你好! مرحبا! 🌍';
const count = countTokens(text);
// Should handle unicode without throwing
expect(count).toBeGreaterThan(0);
});
it('should handle code snippets', () => {
const code = `
function hello() {
console.log("Hello, world!");
return 42;
}
`;
const count = countTokens(code);
expect(count).toBeGreaterThan(10);
});
});
// ============================================
// countFileTokens Tests
// ============================================
describe('countFileTokens', () => {
it('should count tokens in a file', async () => {
const filePath = join(testDir, 'test.txt');
const content = 'This is test content for token counting.';
await writeFile(filePath, content, 'utf-8');
const report = await countFileTokens(filePath, 100);
expect(report.file).toBe(filePath);
expect(report.tokens).toBeGreaterThan(0);
expect(report.budget).toBe(100);
expect(report.overBudget).toBe(false);
expect(report.percentage).toBeLessThan(100);
});
it('should detect when file is over budget', async () => {
const filePath = join(testDir, 'test.txt');
const content = 'This is a longer test content. '.repeat(50);
await writeFile(filePath, content, 'utf-8');
const report = await countFileTokens(filePath, 10);
expect(report.overBudget).toBe(true);
expect(report.percentage).toBeGreaterThan(100);
});
it('should handle non-existent file gracefully', async () => {
const report = await countFileTokens('/non/existent/file.txt', 100);
expect(report.tokens).toBe(0);
expect(report.overBudget).toBe(false);
});
it('should handle empty file', async () => {
const filePath = join(testDir, 'empty.txt');
await writeFile(filePath, '', 'utf-8');
const report = await countFileTokens(filePath, 100);
expect(report.tokens).toBe(0);
expect(report.overBudget).toBe(false);
expect(report.percentage).toBe(0);
});
it('should use Infinity as default budget', async () => {
const filePath = join(testDir, 'test.txt');
await writeFile(filePath, 'Some content', 'utf-8');
const report = await countFileTokens(filePath);
expect(report.budget).toBe(Infinity);
expect(report.overBudget).toBe(false);
expect(report.percentage).toBe(0);
});
});
// ============================================
// validateBudget Tests
// ============================================
describe('validateBudget', () => {
const defaultBudget: TokenBudget = {
summary: 300,
capabilities: 2000,
wisdomPerFile: 1500,
total: 5000,
};
it('should return valid when all reports are within budget', () => {
const reports: TokenReport[] = [
{ file: 'SUMMARY.md', tokens: 200, budget: 300, overBudget: false, percentage: 67 },
{ file: 'capabilities.yaml', tokens: 1000, budget: 2000, overBudget: false, percentage: 50 },
];
const result = validateBudget(reports, defaultBudget);
expect(result.valid).toBe(true);
expect(result.violations).toHaveLength(0);
});
it('should detect individual file budget violations', () => {
const reports: TokenReport[] = [
{ file: 'SUMMARY.md', tokens: 400, budget: 300, overBudget: true, percentage: 133 },
{ file: 'capabilities.yaml', tokens: 1000, budget: 2000, overBudget: false, percentage: 50 },
];
const result = validateBudget(reports, defaultBudget);
expect(result.valid).toBe(false);
expect(result.violations).toHaveLength(1);
expect(result.violations[0].file).toBe('SUMMARY.md');
});
it('should detect total budget violations', () => {
const reports: TokenReport[] = [
{ file: 'file1.md', tokens: 3000, budget: 5000, overBudget: false, percentage: 60 },
{ file: 'file2.md', tokens: 3000, budget: 5000, overBudget: false, percentage: 60 },
];
const result = validateBudget(reports, defaultBudget);
expect(result.valid).toBe(false);
// Should have total violation
const totalViolation = result.violations.find((v) => v.file === '[TOTAL]');
expect(totalViolation).toBeDefined();
expect(totalViolation?.tokens).toBe(6000);
});
it('should handle empty reports array', () => {
const result = validateBudget([], defaultBudget);
expect(result.valid).toBe(true);
expect(result.violations).toHaveLength(0);
});
it('should detect multiple violations', () => {
const reports: TokenReport[] = [
{ file: 'SUMMARY.md', tokens: 500, budget: 300, overBudget: true, percentage: 167 },
{ file: 'wisdom/guide.md', tokens: 2000, budget: 1500, overBudget: true, percentage: 133 },
];
const result = validateBudget(reports, defaultBudget);
expect(result.valid).toBe(false);
expect(result.violations.length).toBeGreaterThanOrEqual(2);
});
});
// ============================================
// truncateToTokenLimit Tests
// ============================================
describe('truncateToTokenLimit', () => {
it('should return text unchanged if within limit', () => {
const text = 'Hello, world!';
const result = truncateToTokenLimit(text, 100);
expect(result).toBe(text);
});
it('should truncate text to fit within limit', () => {
const text = 'This is a test sentence. '.repeat(100);
const result = truncateToTokenLimit(text, 50);
const resultTokens = countTokens(result);
expect(resultTokens).toBeLessThanOrEqual(50);
expect(result.length).toBeLessThan(text.length);
});
it('should return empty string for empty input', () => {
expect(truncateToTokenLimit('', 100)).toBe('');
});
it('should return empty string for zero max tokens', () => {
expect(truncateToTokenLimit('Hello', 0)).toBe('');
});
it('should return empty string for negative max tokens', () => {
expect(truncateToTokenLimit('Hello', -10)).toBe('');
});
it('should try to break at sentence boundaries', () => {
const text = 'First sentence. Second sentence. Third sentence.';
const result = truncateToTokenLimit(text, 8);
// Should end with a period if it found a sentence boundary
if (result.length > 10) {
expect(result.endsWith('.') || result.endsWith('sentence')).toBe(true);
}
});
it('should handle single word exceeding limit', () => {
const text = 'supercalifragilisticexpialidocious';
const result = truncateToTokenLimit(text, 1);
// Should still return something (partial word)
expect(countTokens(result)).toBeLessThanOrEqual(1);
});
});
// ============================================
// createTokenReport Tests
// ============================================
describe('createTokenReport', () => {
it('should create a report with correct values', () => {
const report = createTokenReport('test.md', 'Hello world', 100);
expect(report.file).toBe('test.md');
expect(report.tokens).toBeGreaterThan(0);
expect(report.budget).toBe(100);
expect(report.overBudget).toBe(false);
expect(report.percentage).toBeLessThan(100);
});
it('should detect over budget content', () => {
const content = 'This is content. '.repeat(100);
const report = createTokenReport('test.md', content, 10);
expect(report.overBudget).toBe(true);
expect(report.percentage).toBeGreaterThan(100);
});
it('should handle zero budget', () => {
const report = createTokenReport('test.md', 'content', 0);
expect(report.percentage).toBe(0);
});
});
// ============================================
// getBudgetForFile Tests
// ============================================
describe('getBudgetForFile', () => {
const budget: TokenBudget = {
summary: 300,
capabilities: 2000,
wisdomPerFile: 1500,
total: 20000,
};
it('should return summary budget for summary files', () => {
expect(getBudgetForFile('SUMMARY.md', budget)).toBe(300);
expect(getBudgetForFile('.context/SUMMARY.md', budget)).toBe(300);
expect(getBudgetForFile('docs/summary.txt', budget)).toBe(300);
});
it('should return capabilities budget for capabilities files', () => {
expect(getBudgetForFile('capabilities.yaml', budget)).toBe(2000);
expect(getBudgetForFile('.context/capabilities.yml', budget)).toBe(2000);
});
it('should return wisdom budget for wisdom files', () => {
expect(getBudgetForFile('wisdom/guide.md', budget)).toBe(1500);
expect(getBudgetForFile('.context/wisdom/patterns.md', budget)).toBe(1500);
});
it('should return total budget for unknown files', () => {
expect(getBudgetForFile('random.md', budget)).toBe(20000);
expect(getBudgetForFile('docs/other.txt', budget)).toBe(20000);
});
});
// ============================================
// formatTokenCount Tests
// ============================================
describe('formatTokenCount', () => {
it('should format small numbers as-is', () => {
expect(formatTokenCount(100)).toBe('100');
expect(formatTokenCount(999)).toBe('999');
});
it('should format thousands with k suffix', () => {
expect(formatTokenCount(1000)).toBe('1.0k');
expect(formatTokenCount(1500)).toBe('1.5k');
expect(formatTokenCount(10000)).toBe('10.0k');
});
it('should handle zero', () => {
expect(formatTokenCount(0)).toBe('0');
});
});
});