tasq/node_modules/agentdb/simulation/tests/latent-space/clustering-analysis.test.ts

/**
 * Clustering Analysis Simulation Tests
 *
 * Tests community detection algorithms and semantic clustering quality
 * in RuVector's latent space.
 *
 * Target Metrics:
 * - Louvain algorithm (optimal)
 * - Modularity Q >0.75 (target: 0.758)
 * - Semantic purity: 87.2%
 * - Hierarchical levels: 3
 * - Community detection quality
 */

import { describe, it, expect, beforeAll } from 'vitest';
import { clusteringAnalysisScenario } from '../../scenarios/latent-space/clustering-analysis';
import type { SimulationReport } from '../../types';

describe('ClusteringAnalysis', () => {
  let report: SimulationReport;

  beforeAll(async () => {
    report = await clusteringAnalysisScenario.run(clusteringAnalysisScenario.config);
  }, 90000); // 90s timeout

  describe('Optimal Algorithm', () => {
    it('should select Louvain as best', () => {
      const best = report.summary.bestAlgorithm;
      expect(best.algorithm).toBe('louvain');
    });

    it('should test Louvain algorithm', () => {
      const algorithms = clusteringAnalysisScenario.config.algorithms;
      const louvain = algorithms.find(a => a.name === 'louvain');
      expect(louvain).toBeDefined();
    });

    it('should test Label Propagation', () => {
      const algorithms = clusteringAnalysisScenario.config.algorithms;
      const lp = algorithms.find(a => a.name === 'label-propagation');
      expect(lp).toBeDefined();
    });

    it('should test multiple algorithms', () => {
      const algorithms = clusteringAnalysisScenario.config.algorithms;
      expect(algorithms.length).toBeGreaterThanOrEqual(3);
    });
  });

  describe('Modularity Score', () => {
    it('should achieve Q >0.75', () => {
      const avgModularity = report.summary.avgModularity;
      expect(avgModularity).toBeGreaterThan(0.75);
    });

    it('should target Q=0.758', () => {
      const avgModularity = report.summary.avgModularity;
      expect(avgModularity).toBeCloseTo(0.758, 0.05);
    });

    it('should have positive modularity', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.metrics.modularityScore).toBeGreaterThan(0);
      });
    });

    it('should not exceed 1.0', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.metrics.modularityScore).toBeLessThanOrEqual(1.0);
      });
    });
  });

  describe('Semantic Purity', () => {
    it('should achieve >85% semantic purity', () => {
      const purity = report.summary.semanticPurity;
      expect(purity).toBeGreaterThan(0.85);
    });

    it('should target 87.2% semantic purity', () => {
      const purity = report.summary.semanticPurity;
      expect(purity).toBeCloseTo(0.872, 0.03);
    });

    it('should align graph clusters with embeddings', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.metrics.embeddingClusterOverlap).toBeGreaterThan(0.7);
      });
    });
  });

  describe('Community Structure', () => {
    it('should detect multiple communities', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.metrics.numCommunities).toBeGreaterThan(1);
      });
    });

    it('should have balanced distribution', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(Array.isArray(r.metrics.communityDistribution)).toBe(true);
      });
    });

    it('should track community sizes', () => {
      const metrics = report.metrics.communityStructure;
      expect(metrics.avgNumCommunities).toBeGreaterThan(0);
    });
  });

  describe('Hierarchical Properties', () => {
    it('should have hierarchical depth', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.metrics.hierarchyDepth).toBeGreaterThan(0);
      });
    });

    it('should target 3 hierarchical levels', () => {
      const hierarchy = report.metrics.hierarchicalProperties;
      if (hierarchy && hierarchy.avgDepth) {
        expect(hierarchy.avgDepth).toBeCloseTo(3, 1);
      }
    });

    it('should track dendrogram balance', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        if (r.metrics.dendrogramBalance) {
          expect(r.metrics.dendrogramBalance).toBeGreaterThan(0);
        }
      });
    });

    it('should record merging pattern', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(Array.isArray(r.metrics.mergingPattern)).toBe(true);
      });
    });
  });

  describe('Semantic Alignment', () => {
    it('should measure cross-modal alignment', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        if (r.metrics.crossModalAlignment) {
          expect(r.metrics.crossModalAlignment).toBeGreaterThan(0.7);
        }
      });
    });

    it('should validate semantic categories', () => {
      const categories = clusteringAnalysisScenario.config.semanticCategories;
      expect(categories).toContain('text');
      expect(categories).toContain('code');
    });
  });

  describe('Agent Collaboration', () => {
    it('should identify collaboration clusters', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.metrics.collaborationClusters).toBeGreaterThanOrEqual(0);
      });
    });

    it('should measure task specialization', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        if (r.metrics.taskSpecialization) {
          expect(r.metrics.taskSpecialization).toBeGreaterThan(0.6);
        }
      });
    });

    it('should track communication efficiency', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        if (r.metrics.communicationEfficiency) {
          expect(r.metrics.communicationEfficiency).toBeGreaterThan(0.7);
        }
      });
    });
  });

  describe('Algorithm Comparison', () => {
    it('should compare Louvain vs Label Propagation', () => {
      const louvain = (report.detailedResults as any[]).find(r => r.algorithm === 'louvain');
      const lp = (report.detailedResults as any[]).find(r => r.algorithm === 'label-propagation');

      if (louvain && lp) {
        expect(louvain.metrics.modularityScore).toBeGreaterThan(0);
        expect(lp.metrics.modularityScore).toBeGreaterThan(0);
      }
    });

    it('should test Leiden algorithm', () => {
      const leiden = (report.detailedResults as any[]).find(r => r.algorithm === 'leiden');
      if (leiden) {
        expect(leiden.metrics.modularityScore).toBeGreaterThan(0.7);
      }
    });

    it('should test spectral clustering', () => {
      const spectral = (report.detailedResults as any[]).find(r => r.algorithm === 'spectral');
      if (spectral) {
        expect(spectral.metrics.numCommunities).toBeGreaterThan(0);
      }
    });
  });

  describe('Graph Density Impact', () => {
    it('should test multiple densities', () => {
      const densities = clusteringAnalysisScenario.config.graphDensities;
      expect(densities.length).toBeGreaterThanOrEqual(3);
    });

    it('should handle sparse graphs', () => {
      const sparse = (report.detailedResults as any[]).filter(r => r.graphDensity === 0.01);
      sparse.forEach(r => {
        expect(r.metrics.modularityScore).toBeGreaterThan(0);
      });
    });

    it('should handle dense graphs', () => {
      const dense = (report.detailedResults as any[]).filter(r => r.graphDensity === 0.1);
      dense.forEach(r => {
        expect(r.metrics.modularityScore).toBeGreaterThan(0);
      });
    });
  });

  describe('Scalability', () => {
    it('should scale to 100k nodes', () => {
      const sizes = clusteringAnalysisScenario.config.vectorCounts;
      expect(sizes).toContain(100000);
    });

    it('should maintain quality at scale', () => {
      const large = (report.detailedResults as any[]).filter(r => r.vectorCount === 100000);
      large.forEach(r => {
        expect(r.metrics.modularityScore).toBeGreaterThan(0.70);
      });
    });

    it('should track detection time', () => {
      const results = report.detailedResults as any[];
      results.forEach(r => {
        expect(r.detectionTimeMs).toBeGreaterThan(0);
      });
    });
  });

  describe('Report Generation', () => {
    it('should generate analysis report', () => {
      expect(report.analysis).toBeDefined();
      expect(report.analysis).toContain('Clustering');
    });

    it('should provide recommendations', () => {
      expect(report.recommendations).toBeDefined();
      expect(report.recommendations.some(r => r.includes('Louvain'))).toBe(true);
    });

    it('should generate visualizations', () => {
      expect(report.artifacts.dendrograms).toBeDefined();
      expect(report.artifacts.communityVisualizations).toBeDefined();
      expect(report.artifacts.modularityCharts).toBeDefined();
    });

    it('should complete within timeout', () => {
      expect(report.executionTimeMs).toBeLessThan(90000);
    });
  });
});