/** * Clustering Analysis Simulation Tests * * Tests community detection algorithms and semantic clustering quality * in RuVector's latent space. * * Target Metrics: * - Louvain algorithm (optimal) * - Modularity Q >0.75 (target: 0.758) * - Semantic purity: 87.2% * - Hierarchical levels: 3 * - Community detection quality */ import { describe, it, expect, beforeAll } from 'vitest'; import { clusteringAnalysisScenario } from '../../scenarios/latent-space/clustering-analysis'; import type { SimulationReport } from '../../types'; describe('ClusteringAnalysis', () => { let report: SimulationReport; beforeAll(async () => { report = await clusteringAnalysisScenario.run(clusteringAnalysisScenario.config); }, 90000); // 90s timeout describe('Optimal Algorithm', () => { it('should select Louvain as best', () => { const best = report.summary.bestAlgorithm; expect(best.algorithm).toBe('louvain'); }); it('should test Louvain algorithm', () => { const algorithms = clusteringAnalysisScenario.config.algorithms; const louvain = algorithms.find(a => a.name === 'louvain'); expect(louvain).toBeDefined(); }); it('should test Label Propagation', () => { const algorithms = clusteringAnalysisScenario.config.algorithms; const lp = algorithms.find(a => a.name === 'label-propagation'); expect(lp).toBeDefined(); }); it('should test multiple algorithms', () => { const algorithms = clusteringAnalysisScenario.config.algorithms; expect(algorithms.length).toBeGreaterThanOrEqual(3); }); }); describe('Modularity Score', () => { it('should achieve Q >0.75', () => { const avgModularity = report.summary.avgModularity; expect(avgModularity).toBeGreaterThan(0.75); }); it('should target Q=0.758', () => { const avgModularity = report.summary.avgModularity; expect(avgModularity).toBeCloseTo(0.758, 0.05); }); it('should have positive modularity', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.metrics.modularityScore).toBeGreaterThan(0); }); }); it('should not exceed 1.0', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.metrics.modularityScore).toBeLessThanOrEqual(1.0); }); }); }); describe('Semantic Purity', () => { it('should achieve >85% semantic purity', () => { const purity = report.summary.semanticPurity; expect(purity).toBeGreaterThan(0.85); }); it('should target 87.2% semantic purity', () => { const purity = report.summary.semanticPurity; expect(purity).toBeCloseTo(0.872, 0.03); }); it('should align graph clusters with embeddings', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.metrics.embeddingClusterOverlap).toBeGreaterThan(0.7); }); }); }); describe('Community Structure', () => { it('should detect multiple communities', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.metrics.numCommunities).toBeGreaterThan(1); }); }); it('should have balanced distribution', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(Array.isArray(r.metrics.communityDistribution)).toBe(true); }); }); it('should track community sizes', () => { const metrics = report.metrics.communityStructure; expect(metrics.avgNumCommunities).toBeGreaterThan(0); }); }); describe('Hierarchical Properties', () => { it('should have hierarchical depth', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.metrics.hierarchyDepth).toBeGreaterThan(0); }); }); it('should target 3 hierarchical levels', () => { const hierarchy = report.metrics.hierarchicalProperties; if (hierarchy && hierarchy.avgDepth) { expect(hierarchy.avgDepth).toBeCloseTo(3, 1); } }); it('should track dendrogram balance', () => { const results = report.detailedResults as any[]; results.forEach(r => { if (r.metrics.dendrogramBalance) { expect(r.metrics.dendrogramBalance).toBeGreaterThan(0); } }); }); it('should record merging pattern', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(Array.isArray(r.metrics.mergingPattern)).toBe(true); }); }); }); describe('Semantic Alignment', () => { it('should measure cross-modal alignment', () => { const results = report.detailedResults as any[]; results.forEach(r => { if (r.metrics.crossModalAlignment) { expect(r.metrics.crossModalAlignment).toBeGreaterThan(0.7); } }); }); it('should validate semantic categories', () => { const categories = clusteringAnalysisScenario.config.semanticCategories; expect(categories).toContain('text'); expect(categories).toContain('code'); }); }); describe('Agent Collaboration', () => { it('should identify collaboration clusters', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.metrics.collaborationClusters).toBeGreaterThanOrEqual(0); }); }); it('should measure task specialization', () => { const results = report.detailedResults as any[]; results.forEach(r => { if (r.metrics.taskSpecialization) { expect(r.metrics.taskSpecialization).toBeGreaterThan(0.6); } }); }); it('should track communication efficiency', () => { const results = report.detailedResults as any[]; results.forEach(r => { if (r.metrics.communicationEfficiency) { expect(r.metrics.communicationEfficiency).toBeGreaterThan(0.7); } }); }); }); describe('Algorithm Comparison', () => { it('should compare Louvain vs Label Propagation', () => { const louvain = (report.detailedResults as any[]).find(r => r.algorithm === 'louvain'); const lp = (report.detailedResults as any[]).find(r => r.algorithm === 'label-propagation'); if (louvain && lp) { expect(louvain.metrics.modularityScore).toBeGreaterThan(0); expect(lp.metrics.modularityScore).toBeGreaterThan(0); } }); it('should test Leiden algorithm', () => { const leiden = (report.detailedResults as any[]).find(r => r.algorithm === 'leiden'); if (leiden) { expect(leiden.metrics.modularityScore).toBeGreaterThan(0.7); } }); it('should test spectral clustering', () => { const spectral = (report.detailedResults as any[]).find(r => r.algorithm === 'spectral'); if (spectral) { expect(spectral.metrics.numCommunities).toBeGreaterThan(0); } }); }); describe('Graph Density Impact', () => { it('should test multiple densities', () => { const densities = clusteringAnalysisScenario.config.graphDensities; expect(densities.length).toBeGreaterThanOrEqual(3); }); it('should handle sparse graphs', () => { const sparse = (report.detailedResults as any[]).filter(r => r.graphDensity === 0.01); sparse.forEach(r => { expect(r.metrics.modularityScore).toBeGreaterThan(0); }); }); it('should handle dense graphs', () => { const dense = (report.detailedResults as any[]).filter(r => r.graphDensity === 0.1); dense.forEach(r => { expect(r.metrics.modularityScore).toBeGreaterThan(0); }); }); }); describe('Scalability', () => { it('should scale to 100k nodes', () => { const sizes = clusteringAnalysisScenario.config.vectorCounts; expect(sizes).toContain(100000); }); it('should maintain quality at scale', () => { const large = (report.detailedResults as any[]).filter(r => r.vectorCount === 100000); large.forEach(r => { expect(r.metrics.modularityScore).toBeGreaterThan(0.70); }); }); it('should track detection time', () => { const results = report.detailedResults as any[]; results.forEach(r => { expect(r.detectionTimeMs).toBeGreaterThan(0); }); }); }); describe('Report Generation', () => { it('should generate analysis report', () => { expect(report.analysis).toBeDefined(); expect(report.analysis).toContain('Clustering'); }); it('should provide recommendations', () => { expect(report.recommendations).toBeDefined(); expect(report.recommendations.some(r => r.includes('Louvain'))).toBe(true); }); it('should generate visualizations', () => { expect(report.artifacts.dendrograms).toBeDefined(); expect(report.artifacts.communityVisualizations).toBeDefined(); expect(report.artifacts.modularityCharts).toBeDefined(); }); it('should complete within timeout', () => { expect(report.executionTimeMs).toBeLessThan(90000); }); }); });