282 lines
9.0 KiB
TypeScript
282 lines
9.0 KiB
TypeScript
/**
|
|
* Clustering Analysis Simulation Tests
|
|
*
|
|
* Tests community detection algorithms and semantic clustering quality
|
|
* in RuVector's latent space.
|
|
*
|
|
* Target Metrics:
|
|
* - Louvain algorithm (optimal)
|
|
* - Modularity Q >0.75 (target: 0.758)
|
|
* - Semantic purity: 87.2%
|
|
* - Hierarchical levels: 3
|
|
* - Community detection quality
|
|
*/
|
|
|
|
import { describe, it, expect, beforeAll } from 'vitest';
|
|
import { clusteringAnalysisScenario } from '../../scenarios/latent-space/clustering-analysis';
|
|
import type { SimulationReport } from '../../types';
|
|
|
|
describe('ClusteringAnalysis', () => {
|
|
let report: SimulationReport;
|
|
|
|
beforeAll(async () => {
|
|
report = await clusteringAnalysisScenario.run(clusteringAnalysisScenario.config);
|
|
}, 90000); // 90s timeout
|
|
|
|
describe('Optimal Algorithm', () => {
|
|
it('should select Louvain as best', () => {
|
|
const best = report.summary.bestAlgorithm;
|
|
expect(best.algorithm).toBe('louvain');
|
|
});
|
|
|
|
it('should test Louvain algorithm', () => {
|
|
const algorithms = clusteringAnalysisScenario.config.algorithms;
|
|
const louvain = algorithms.find(a => a.name === 'louvain');
|
|
expect(louvain).toBeDefined();
|
|
});
|
|
|
|
it('should test Label Propagation', () => {
|
|
const algorithms = clusteringAnalysisScenario.config.algorithms;
|
|
const lp = algorithms.find(a => a.name === 'label-propagation');
|
|
expect(lp).toBeDefined();
|
|
});
|
|
|
|
it('should test multiple algorithms', () => {
|
|
const algorithms = clusteringAnalysisScenario.config.algorithms;
|
|
expect(algorithms.length).toBeGreaterThanOrEqual(3);
|
|
});
|
|
});
|
|
|
|
describe('Modularity Score', () => {
|
|
it('should achieve Q >0.75', () => {
|
|
const avgModularity = report.summary.avgModularity;
|
|
expect(avgModularity).toBeGreaterThan(0.75);
|
|
});
|
|
|
|
it('should target Q=0.758', () => {
|
|
const avgModularity = report.summary.avgModularity;
|
|
expect(avgModularity).toBeCloseTo(0.758, 0.05);
|
|
});
|
|
|
|
it('should have positive modularity', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.metrics.modularityScore).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
|
|
it('should not exceed 1.0', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.metrics.modularityScore).toBeLessThanOrEqual(1.0);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Semantic Purity', () => {
|
|
it('should achieve >85% semantic purity', () => {
|
|
const purity = report.summary.semanticPurity;
|
|
expect(purity).toBeGreaterThan(0.85);
|
|
});
|
|
|
|
it('should target 87.2% semantic purity', () => {
|
|
const purity = report.summary.semanticPurity;
|
|
expect(purity).toBeCloseTo(0.872, 0.03);
|
|
});
|
|
|
|
it('should align graph clusters with embeddings', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.metrics.embeddingClusterOverlap).toBeGreaterThan(0.7);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Community Structure', () => {
|
|
it('should detect multiple communities', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.metrics.numCommunities).toBeGreaterThan(1);
|
|
});
|
|
});
|
|
|
|
it('should have balanced distribution', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(Array.isArray(r.metrics.communityDistribution)).toBe(true);
|
|
});
|
|
});
|
|
|
|
it('should track community sizes', () => {
|
|
const metrics = report.metrics.communityStructure;
|
|
expect(metrics.avgNumCommunities).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
|
|
describe('Hierarchical Properties', () => {
|
|
it('should have hierarchical depth', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.metrics.hierarchyDepth).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
|
|
it('should target 3 hierarchical levels', () => {
|
|
const hierarchy = report.metrics.hierarchicalProperties;
|
|
if (hierarchy && hierarchy.avgDepth) {
|
|
expect(hierarchy.avgDepth).toBeCloseTo(3, 1);
|
|
}
|
|
});
|
|
|
|
it('should track dendrogram balance', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
if (r.metrics.dendrogramBalance) {
|
|
expect(r.metrics.dendrogramBalance).toBeGreaterThan(0);
|
|
}
|
|
});
|
|
});
|
|
|
|
it('should record merging pattern', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(Array.isArray(r.metrics.mergingPattern)).toBe(true);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Semantic Alignment', () => {
|
|
it('should measure cross-modal alignment', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
if (r.metrics.crossModalAlignment) {
|
|
expect(r.metrics.crossModalAlignment).toBeGreaterThan(0.7);
|
|
}
|
|
});
|
|
});
|
|
|
|
it('should validate semantic categories', () => {
|
|
const categories = clusteringAnalysisScenario.config.semanticCategories;
|
|
expect(categories).toContain('text');
|
|
expect(categories).toContain('code');
|
|
});
|
|
});
|
|
|
|
describe('Agent Collaboration', () => {
|
|
it('should identify collaboration clusters', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.metrics.collaborationClusters).toBeGreaterThanOrEqual(0);
|
|
});
|
|
});
|
|
|
|
it('should measure task specialization', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
if (r.metrics.taskSpecialization) {
|
|
expect(r.metrics.taskSpecialization).toBeGreaterThan(0.6);
|
|
}
|
|
});
|
|
});
|
|
|
|
it('should track communication efficiency', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
if (r.metrics.communicationEfficiency) {
|
|
expect(r.metrics.communicationEfficiency).toBeGreaterThan(0.7);
|
|
}
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Algorithm Comparison', () => {
|
|
it('should compare Louvain vs Label Propagation', () => {
|
|
const louvain = (report.detailedResults as any[]).find(r => r.algorithm === 'louvain');
|
|
const lp = (report.detailedResults as any[]).find(r => r.algorithm === 'label-propagation');
|
|
|
|
if (louvain && lp) {
|
|
expect(louvain.metrics.modularityScore).toBeGreaterThan(0);
|
|
expect(lp.metrics.modularityScore).toBeGreaterThan(0);
|
|
}
|
|
});
|
|
|
|
it('should test Leiden algorithm', () => {
|
|
const leiden = (report.detailedResults as any[]).find(r => r.algorithm === 'leiden');
|
|
if (leiden) {
|
|
expect(leiden.metrics.modularityScore).toBeGreaterThan(0.7);
|
|
}
|
|
});
|
|
|
|
it('should test spectral clustering', () => {
|
|
const spectral = (report.detailedResults as any[]).find(r => r.algorithm === 'spectral');
|
|
if (spectral) {
|
|
expect(spectral.metrics.numCommunities).toBeGreaterThan(0);
|
|
}
|
|
});
|
|
});
|
|
|
|
describe('Graph Density Impact', () => {
|
|
it('should test multiple densities', () => {
|
|
const densities = clusteringAnalysisScenario.config.graphDensities;
|
|
expect(densities.length).toBeGreaterThanOrEqual(3);
|
|
});
|
|
|
|
it('should handle sparse graphs', () => {
|
|
const sparse = (report.detailedResults as any[]).filter(r => r.graphDensity === 0.01);
|
|
sparse.forEach(r => {
|
|
expect(r.metrics.modularityScore).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
|
|
it('should handle dense graphs', () => {
|
|
const dense = (report.detailedResults as any[]).filter(r => r.graphDensity === 0.1);
|
|
dense.forEach(r => {
|
|
expect(r.metrics.modularityScore).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Scalability', () => {
|
|
it('should scale to 100k nodes', () => {
|
|
const sizes = clusteringAnalysisScenario.config.vectorCounts;
|
|
expect(sizes).toContain(100000);
|
|
});
|
|
|
|
it('should maintain quality at scale', () => {
|
|
const large = (report.detailedResults as any[]).filter(r => r.vectorCount === 100000);
|
|
large.forEach(r => {
|
|
expect(r.metrics.modularityScore).toBeGreaterThan(0.70);
|
|
});
|
|
});
|
|
|
|
it('should track detection time', () => {
|
|
const results = report.detailedResults as any[];
|
|
results.forEach(r => {
|
|
expect(r.detectionTimeMs).toBeGreaterThan(0);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('Report Generation', () => {
|
|
it('should generate analysis report', () => {
|
|
expect(report.analysis).toBeDefined();
|
|
expect(report.analysis).toContain('Clustering');
|
|
});
|
|
|
|
it('should provide recommendations', () => {
|
|
expect(report.recommendations).toBeDefined();
|
|
expect(report.recommendations.some(r => r.includes('Louvain'))).toBe(true);
|
|
});
|
|
|
|
it('should generate visualizations', () => {
|
|
expect(report.artifacts.dendrograms).toBeDefined();
|
|
expect(report.artifacts.communityVisualizations).toBeDefined();
|
|
expect(report.artifacts.modularityCharts).toBeDefined();
|
|
});
|
|
|
|
it('should complete within timeout', () => {
|
|
expect(report.executionTimeMs).toBeLessThan(90000);
|
|
});
|
|
});
|
|
});
|