598 lines
16 KiB
TypeScript
598 lines
16 KiB
TypeScript
/**
|
|
* Completeness Validator
|
|
*
|
|
* Validates that documented capabilities match extracted code entities.
|
|
* Ensures the knowledge graph stays in sync with actual code.
|
|
*/
|
|
|
|
import { readFile } from 'node:fs/promises';
|
|
import { parse as parseYaml } from 'yaml';
|
|
import type {
|
|
KnowledgeGraph,
|
|
ValidationResult,
|
|
ValidationWarning,
|
|
EntityType,
|
|
} from './types.js';
|
|
|
|
// ============================================
|
|
// Types
|
|
// ============================================
|
|
|
|
/**
|
|
* Represents a single capability entry in the YAML file
|
|
*/
|
|
export interface CapabilityEntry {
|
|
/** The file path or package reference */
|
|
path: string;
|
|
/** Original key in the YAML (e.g., 'line', 'bar') */
|
|
key: string;
|
|
/** Full qualified path (e.g., 'ui_components.charts.line') */
|
|
qualifiedPath: string;
|
|
/** Inferred entity type */
|
|
type?: EntityType;
|
|
}
|
|
|
|
/**
|
|
* Parsed capabilities.yaml structure
|
|
*/
|
|
export interface DocumentedCapabilities {
|
|
/** Flattened list of all capability entries */
|
|
entries: CapabilityEntry[];
|
|
/** Raw YAML structure for reference */
|
|
raw: Record<string, unknown>;
|
|
/** Top-level categories found */
|
|
categories: string[];
|
|
/** File path this was loaded from */
|
|
sourcePath?: string;
|
|
}
|
|
|
|
/**
|
|
* Options for validation
|
|
*/
|
|
export interface ValidateOptions {
|
|
/**
|
|
* Minimum confidence score to consider an entity match (0-100)
|
|
* @default 80
|
|
*/
|
|
minConfidence?: number;
|
|
|
|
/**
|
|
* Whether to include warnings for partial matches
|
|
* @default true
|
|
*/
|
|
includePartialMatches?: boolean;
|
|
|
|
/**
|
|
* Categories to skip during validation
|
|
* @default ['framework', 'packages_available', 'patterns']
|
|
*/
|
|
skipCategories?: string[];
|
|
|
|
/**
|
|
* Whether to be strict about entity type matching
|
|
* @default false
|
|
*/
|
|
strictTypeMatching?: boolean;
|
|
}
|
|
|
|
// ============================================
|
|
// Constants
|
|
// ============================================
|
|
|
|
const DEFAULT_OPTIONS: Required<ValidateOptions> = {
|
|
minConfidence: 80,
|
|
includePartialMatches: true,
|
|
skipCategories: ['framework', 'packages_available', 'patterns', 'data_models'],
|
|
strictTypeMatching: false,
|
|
};
|
|
|
|
/**
|
|
* Categories that typically contain code entities (for future use in auto-categorization)
|
|
*/
|
|
const _CODE_CATEGORIES = ['ui_components', 'hooks', 'utilities', 'schemas', 'api'];
|
|
void _CODE_CATEGORIES; // Reserved for future use
|
|
|
|
// ============================================
|
|
// Helper Functions
|
|
// ============================================
|
|
|
|
/**
|
|
* Flatten a nested YAML object into capability entries
|
|
*/
|
|
function flattenCapabilities(
|
|
obj: unknown,
|
|
parentPath: string = '',
|
|
entries: CapabilityEntry[] = []
|
|
): CapabilityEntry[] {
|
|
if (obj === null || obj === undefined) {
|
|
return entries;
|
|
}
|
|
|
|
if (typeof obj === 'string') {
|
|
// Leaf node - this is a file path
|
|
const key = parentPath.split('.').pop() || parentPath;
|
|
entries.push({
|
|
path: obj,
|
|
key,
|
|
qualifiedPath: parentPath,
|
|
type: inferEntityType(parentPath, obj),
|
|
});
|
|
return entries;
|
|
}
|
|
|
|
if (typeof obj === 'object' && !Array.isArray(obj)) {
|
|
for (const [key, value] of Object.entries(obj as Record<string, unknown>)) {
|
|
// Skip keys that start with underscore (metadata like _docs)
|
|
if (key.startsWith('_')) {
|
|
continue;
|
|
}
|
|
|
|
const newPath = parentPath ? `${parentPath}.${key}` : key;
|
|
flattenCapabilities(value, newPath, entries);
|
|
}
|
|
}
|
|
|
|
if (Array.isArray(obj)) {
|
|
// Arrays in capabilities are typically lists of items, not file paths
|
|
// Skip them for validation purposes
|
|
return entries;
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
/**
|
|
* Infer entity type from the qualified path and file path
|
|
*/
|
|
function inferEntityType(qualifiedPath: string, filePath: string): EntityType | undefined {
|
|
const pathLower = qualifiedPath.toLowerCase();
|
|
const filePathLower = filePath.toLowerCase();
|
|
|
|
if (pathLower.includes('component') || pathLower.includes('ui_components')) {
|
|
return 'component';
|
|
}
|
|
if (pathLower.includes('hook') || filePathLower.includes('use')) {
|
|
return 'hook';
|
|
}
|
|
if (pathLower.includes('schema') || filePathLower.includes('schema')) {
|
|
return 'schema';
|
|
}
|
|
if (pathLower.includes('api') || pathLower.includes('endpoint')) {
|
|
return 'endpoint';
|
|
}
|
|
if (pathLower.includes('util') || pathLower.includes('helper')) {
|
|
return 'utility';
|
|
}
|
|
|
|
return undefined;
|
|
}
|
|
|
|
/**
|
|
* Extract entity name from a file path
|
|
*/
|
|
function extractEntityName(filePath: string): string {
|
|
// Handle paths like 'liquid-render/components/data-table.tsx'
|
|
const parts = filePath.split('/');
|
|
const fileName = parts[parts.length - 1];
|
|
|
|
// Remove extension
|
|
const nameWithoutExt = fileName.replace(/\.(tsx?|jsx?|js)$/, '');
|
|
|
|
// Convert kebab-case to PascalCase for components
|
|
return nameWithoutExt
|
|
.split('-')
|
|
.map(part => part.charAt(0).toUpperCase() + part.slice(1))
|
|
.join('');
|
|
}
|
|
|
|
/**
|
|
* Normalize an entity name for comparison
|
|
*/
|
|
function normalizeEntityName(name: string): string {
|
|
return name
|
|
.toLowerCase()
|
|
// Remove common prefixes/suffixes
|
|
.replace(/^(use|get|set|is|has)/, '')
|
|
.replace(/(component|hook|util|helper|schema)$/, '')
|
|
// Convert any case to lowercase
|
|
.replace(/([A-Z])/g, '-$1')
|
|
.replace(/^-/, '')
|
|
.replace(/-+/g, '-');
|
|
}
|
|
|
|
/**
|
|
* Calculate similarity between two strings (0-100)
|
|
*/
|
|
function calculateSimilarity(str1: string, str2: string): number {
|
|
const s1 = normalizeEntityName(str1);
|
|
const s2 = normalizeEntityName(str2);
|
|
|
|
if (s1 === s2) return 100;
|
|
|
|
// Check if one contains the other
|
|
if (s1.includes(s2) || s2.includes(s1)) {
|
|
const longer = s1.length > s2.length ? s1 : s2;
|
|
const shorter = s1.length > s2.length ? s2 : s1;
|
|
return Math.round((shorter.length / longer.length) * 100);
|
|
}
|
|
|
|
// Levenshtein distance-based similarity
|
|
const matrix: number[][] = [];
|
|
const n = s1.length;
|
|
const m = s2.length;
|
|
|
|
if (n === 0) return m === 0 ? 100 : 0;
|
|
if (m === 0) return 0;
|
|
|
|
for (let i = 0; i <= n; i++) {
|
|
matrix[i] = [i];
|
|
}
|
|
for (let j = 0; j <= m; j++) {
|
|
matrix[0][j] = j;
|
|
}
|
|
|
|
for (let i = 1; i <= n; i++) {
|
|
for (let j = 1; j <= m; j++) {
|
|
const cost = s1[i - 1] === s2[j - 1] ? 0 : 1;
|
|
matrix[i][j] = Math.min(
|
|
matrix[i - 1][j] + 1,
|
|
matrix[i][j - 1] + 1,
|
|
matrix[i - 1][j - 1] + cost
|
|
);
|
|
}
|
|
}
|
|
|
|
const maxLen = Math.max(n, m);
|
|
const distance = matrix[n][m];
|
|
return Math.round(((maxLen - distance) / maxLen) * 100);
|
|
}
|
|
|
|
/**
|
|
* Find the best matching documented entity for a code entity
|
|
*/
|
|
function findBestMatch(
|
|
entityName: string,
|
|
documentedEntries: CapabilityEntry[],
|
|
minConfidence: number
|
|
): { entry: CapabilityEntry; confidence: number } | null {
|
|
let bestMatch: { entry: CapabilityEntry; confidence: number } | null = null;
|
|
|
|
for (const entry of documentedEntries) {
|
|
const documentedName = extractEntityName(entry.path);
|
|
const similarity = calculateSimilarity(entityName, documentedName);
|
|
|
|
// Also check the key itself
|
|
const keySimilarity = calculateSimilarity(entityName, entry.key);
|
|
const maxSimilarity = Math.max(similarity, keySimilarity);
|
|
|
|
if (maxSimilarity >= minConfidence) {
|
|
if (!bestMatch || maxSimilarity > bestMatch.confidence) {
|
|
bestMatch = { entry, confidence: maxSimilarity };
|
|
}
|
|
}
|
|
}
|
|
|
|
return bestMatch;
|
|
}
|
|
|
|
/**
|
|
* Calculate the completeness score
|
|
*/
|
|
function calculateScore(
|
|
totalEntities: number,
|
|
missingCount: number,
|
|
staleCount: number
|
|
): number {
|
|
if (totalEntities === 0) {
|
|
return 100; // Nothing to validate
|
|
}
|
|
|
|
const issueCount = missingCount + staleCount;
|
|
const score = 100 - (issueCount / totalEntities) * 100;
|
|
|
|
return Math.max(0, Math.min(100, Math.round(score)));
|
|
}
|
|
|
|
/**
|
|
* Generate suggestions based on validation findings
|
|
*/
|
|
function generateSuggestions(
|
|
missing: string[],
|
|
stale: string[],
|
|
warnings: ValidationWarning[]
|
|
): string[] {
|
|
const suggestions: string[] = [];
|
|
|
|
if (missing.length > 0) {
|
|
if (missing.length <= 3) {
|
|
suggestions.push(
|
|
`Add documentation for: ${missing.join(', ')}`
|
|
);
|
|
} else {
|
|
suggestions.push(
|
|
`Add documentation for ${missing.length} undocumented entities (run with --verbose for full list)`
|
|
);
|
|
}
|
|
}
|
|
|
|
if (stale.length > 0) {
|
|
if (stale.length <= 3) {
|
|
suggestions.push(
|
|
`Remove or update stale entries: ${stale.join(', ')}`
|
|
);
|
|
} else {
|
|
suggestions.push(
|
|
`Clean up ${stale.length} stale documentation entries`
|
|
);
|
|
}
|
|
}
|
|
|
|
const lowConfidenceWarnings = warnings.filter(w => w.type === 'low-confidence');
|
|
if (lowConfidenceWarnings.length > 0) {
|
|
suggestions.push(
|
|
'Review entities with low-confidence matches - names may have drifted'
|
|
);
|
|
}
|
|
|
|
const incompleteWarnings = warnings.filter(w => w.type === 'incomplete');
|
|
if (incompleteWarnings.length > 0) {
|
|
suggestions.push(
|
|
'Some documented entries reference files that could not be verified'
|
|
);
|
|
}
|
|
|
|
if (suggestions.length === 0) {
|
|
suggestions.push('Documentation is complete and up-to-date!');
|
|
}
|
|
|
|
return suggestions;
|
|
}
|
|
|
|
/**
|
|
* Check if a category should be validated (contains code references)
|
|
*/
|
|
function shouldValidateCategory(category: string, skipCategories: string[]): boolean {
|
|
return !skipCategories.includes(category);
|
|
}
|
|
|
|
// ============================================
|
|
// YAML Parsing
|
|
// ============================================
|
|
|
|
/**
|
|
* Parse a capabilities YAML file into structured format
|
|
*/
|
|
export function parseCapabilitiesYaml(
|
|
content: string,
|
|
sourcePath?: string
|
|
): DocumentedCapabilities {
|
|
const raw = parseYaml(content) as Record<string, unknown>;
|
|
const categories = Object.keys(raw).filter(k => !k.startsWith('_'));
|
|
const entries = flattenCapabilities(raw);
|
|
|
|
return {
|
|
entries,
|
|
raw,
|
|
categories,
|
|
sourcePath,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Load and parse a capabilities.yaml file
|
|
*/
|
|
export async function loadCapabilitiesFile(
|
|
filePath: string
|
|
): Promise<DocumentedCapabilities> {
|
|
try {
|
|
const content = await readFile(filePath, 'utf-8');
|
|
return parseCapabilitiesYaml(content, filePath);
|
|
} catch (error) {
|
|
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
|
// File doesn't exist - return empty capabilities
|
|
return {
|
|
entries: [],
|
|
raw: {},
|
|
categories: [],
|
|
sourcePath: filePath,
|
|
};
|
|
}
|
|
throw new Error(
|
|
`Failed to load capabilities file: ${error instanceof Error ? error.message : String(error)}`
|
|
);
|
|
}
|
|
}
|
|
|
|
// ============================================
|
|
// Main Validation Functions
|
|
// ============================================
|
|
|
|
/**
|
|
* Validate that documented capabilities match the knowledge graph
|
|
*
|
|
* @param knowledgeGraph - Extracted entities from code
|
|
* @param documentedCapabilities - Parsed capabilities.yaml content
|
|
* @param options - Validation options
|
|
* @returns Validation result with score, missing/stale entities, and suggestions
|
|
*/
|
|
export function validateCompleteness(
|
|
knowledgeGraph: KnowledgeGraph,
|
|
documentedCapabilities: DocumentedCapabilities,
|
|
options: ValidateOptions = {}
|
|
): ValidationResult {
|
|
const opts = { ...DEFAULT_OPTIONS, ...options };
|
|
const missing: string[] = [];
|
|
const stale: string[] = [];
|
|
const warnings: ValidationWarning[] = [];
|
|
|
|
// Get entities from the knowledge graph
|
|
const codeEntities = Object.values(knowledgeGraph.entities);
|
|
|
|
// Filter documented entries to only include code-relevant categories
|
|
const codeRelatedEntries = documentedCapabilities.entries.filter(entry => {
|
|
const category = entry.qualifiedPath.split('.')[0];
|
|
return shouldValidateCategory(category, opts.skipCategories);
|
|
});
|
|
|
|
// Track which documented entries have been matched
|
|
const matchedDocumentedEntries = new Set<string>();
|
|
const matchedCodeEntities = new Set<string>();
|
|
|
|
// Check each code entity against documentation
|
|
for (const entity of codeEntities) {
|
|
const match = findBestMatch(entity.name, codeRelatedEntries, opts.minConfidence);
|
|
|
|
if (match) {
|
|
matchedDocumentedEntries.add(match.entry.qualifiedPath);
|
|
matchedCodeEntities.add(entity.name);
|
|
|
|
// Add warning for low-confidence matches
|
|
if (match.confidence < 100 && match.confidence >= opts.minConfidence) {
|
|
warnings.push({
|
|
type: 'low-confidence',
|
|
entity: entity.name,
|
|
message: `Matched to '${match.entry.qualifiedPath}' with ${match.confidence}% confidence`,
|
|
});
|
|
}
|
|
|
|
// Check type mismatch if strict matching is enabled
|
|
if (opts.strictTypeMatching && match.entry.type && match.entry.type !== entity.type) {
|
|
warnings.push({
|
|
type: 'incomplete',
|
|
entity: entity.name,
|
|
message: `Type mismatch: documented as '${match.entry.type}', code is '${entity.type}'`,
|
|
});
|
|
}
|
|
} else {
|
|
// Entity in code but not documented
|
|
missing.push(entity.name);
|
|
}
|
|
}
|
|
|
|
// Find stale documentation (documented but not in code)
|
|
for (const entry of codeRelatedEntries) {
|
|
if (!matchedDocumentedEntries.has(entry.qualifiedPath)) {
|
|
// Check if the path contains a package reference (not a file path)
|
|
const isPackageRef = entry.path.startsWith('@') || !entry.path.includes('/');
|
|
|
|
if (!isPackageRef) {
|
|
stale.push(entry.qualifiedPath);
|
|
warnings.push({
|
|
type: 'outdated',
|
|
entity: entry.qualifiedPath,
|
|
message: `Documented entry '${entry.path}' not found in extracted code`,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculate total entities for scoring
|
|
// Consider both documented and code entities
|
|
const totalEntities = new Set([
|
|
...codeEntities.map(e => e.name),
|
|
...codeRelatedEntries.map(e => e.qualifiedPath),
|
|
]).size;
|
|
|
|
const score = calculateScore(totalEntities, missing.length, stale.length);
|
|
const suggestions = generateSuggestions(missing, stale, warnings);
|
|
|
|
return {
|
|
score,
|
|
missing,
|
|
stale,
|
|
warnings,
|
|
suggestions,
|
|
validatedAt: new Date(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Validate a capabilities file against a knowledge graph
|
|
*
|
|
* Convenience function that loads the capabilities file and runs validation.
|
|
*
|
|
* @param filePath - Path to capabilities.yaml file
|
|
* @param knowledgeGraph - Extracted knowledge graph
|
|
* @param options - Validation options
|
|
* @returns Validation result
|
|
*/
|
|
export async function validateCapabilitiesFile(
|
|
filePath: string,
|
|
knowledgeGraph: KnowledgeGraph,
|
|
options: ValidateOptions = {}
|
|
): Promise<ValidationResult> {
|
|
const capabilities = await loadCapabilitiesFile(filePath);
|
|
return validateCompleteness(knowledgeGraph, capabilities, options);
|
|
}
|
|
|
|
/**
|
|
* Create an empty validation result (for cases with no data)
|
|
*/
|
|
export function createEmptyValidationResult(): ValidationResult {
|
|
return {
|
|
score: 100,
|
|
missing: [],
|
|
stale: [],
|
|
warnings: [],
|
|
suggestions: ['No entities to validate'],
|
|
validatedAt: new Date(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Check if a validation result indicates issues
|
|
*/
|
|
export function hasValidationIssues(result: ValidationResult): boolean {
|
|
return result.missing.length > 0 || result.stale.length > 0;
|
|
}
|
|
|
|
/**
|
|
* Format validation result as a human-readable string
|
|
*/
|
|
export function formatValidationResult(result: ValidationResult): string {
|
|
const lines: string[] = [];
|
|
|
|
lines.push(`Completeness Score: ${result.score}%`);
|
|
lines.push('');
|
|
|
|
if (result.missing.length > 0) {
|
|
lines.push(`Missing from documentation (${result.missing.length}):`);
|
|
for (const entity of result.missing.slice(0, 10)) {
|
|
lines.push(` - ${entity}`);
|
|
}
|
|
if (result.missing.length > 10) {
|
|
lines.push(` ... and ${result.missing.length - 10} more`);
|
|
}
|
|
lines.push('');
|
|
}
|
|
|
|
if (result.stale.length > 0) {
|
|
lines.push(`Stale documentation (${result.stale.length}):`);
|
|
for (const entry of result.stale.slice(0, 10)) {
|
|
lines.push(` - ${entry}`);
|
|
}
|
|
if (result.stale.length > 10) {
|
|
lines.push(` ... and ${result.stale.length - 10} more`);
|
|
}
|
|
lines.push('');
|
|
}
|
|
|
|
if (result.warnings.length > 0) {
|
|
lines.push(`Warnings (${result.warnings.length}):`);
|
|
for (const warning of result.warnings.slice(0, 5)) {
|
|
lines.push(` [${warning.type}] ${warning.entity}: ${warning.message}`);
|
|
}
|
|
if (result.warnings.length > 5) {
|
|
lines.push(` ... and ${result.warnings.length - 5} more`);
|
|
}
|
|
lines.push('');
|
|
}
|
|
|
|
lines.push('Suggestions:');
|
|
for (const suggestion of result.suggestions) {
|
|
lines.push(` - ${suggestion}`);
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|