tasq/node_modules/agentic-flow/dist/intelligence/embedding-benchmark.js

155 lines
6.7 KiB
JavaScript

/**
* Embedding Benchmark - Compare simple vs ONNX embeddings
*
* Run with: npx ts-node src/intelligence/embedding-benchmark.ts
*/
import { getEmbeddingService, EmbeddingService } from './EmbeddingService.js';
const TEST_TEXTS = [
'Fix a bug in the authentication system',
'Implement user login functionality',
'Write unit tests for the API',
'Refactor the database layer',
'Optimize memory usage',
'Add dark mode to the UI',
'Deploy to production',
'Review pull request',
'Document the API endpoints',
'Set up CI/CD pipeline',
];
const SEMANTIC_PAIRS = [
{ a: 'I love dogs', b: 'I adore puppies', expected: 'high' },
{ a: 'Fix authentication bug', b: 'Repair login issue', expected: 'high' },
{ a: 'Write unit tests', b: 'Create test cases', expected: 'high' },
{ a: 'Deploy to production', b: 'The weather is nice', expected: 'low' },
{ a: 'Fix bug', b: 'Add feature', expected: 'medium' },
{ a: 'Machine learning', b: 'Artificial intelligence', expected: 'high' },
{ a: 'Pizza recipe', b: 'Quantum physics', expected: 'low' },
];
async function runBenchmark() {
console.log('='.repeat(60));
console.log('Embedding Benchmark: Simple vs ONNX');
console.log('='.repeat(60));
// Benchmark Simple Embeddings
console.log('\n--- Simple Embeddings ---');
process.env.AGENTIC_FLOW_EMBEDDINGS = 'simple';
EmbeddingService.reset();
const simpleService = getEmbeddingService();
const simpleResults = await benchmarkService(simpleService, 'simple');
// Benchmark ONNX Embeddings
console.log('\n--- ONNX Embeddings ---');
process.env.AGENTIC_FLOW_EMBEDDINGS = 'onnx';
EmbeddingService.reset();
const onnxService = getEmbeddingService();
const onnxResults = await benchmarkService(onnxService, 'onnx');
// Summary
console.log('\n' + '='.repeat(60));
console.log('SUMMARY');
console.log('='.repeat(60));
console.log(`
| Metric | Simple | ONNX |
|-----------------------|-------------|-------------|
| Avg Latency (cold) | ${simpleResults.avgColdLatency.toFixed(2)}ms | ${onnxResults.avgColdLatency.toFixed(2)}ms |
| Avg Latency (warm) | ${simpleResults.avgWarmLatency.toFixed(2)}ms | ${onnxResults.avgWarmLatency.toFixed(2)}ms |
| Batch 10 texts | ${simpleResults.batchLatency.toFixed(2)}ms | ${onnxResults.batchLatency.toFixed(2)}ms |
| Dimension | ${simpleResults.dimension} | ${onnxResults.dimension} |
| Semantic Accuracy | ${simpleResults.semanticAccuracy}% | ${onnxResults.semanticAccuracy}% |
`);
console.log('\nSemantic Similarity Comparison:');
console.log('-'.repeat(60));
for (let i = 0; i < SEMANTIC_PAIRS.length; i++) {
const pair = SEMANTIC_PAIRS[i];
console.log(`"${pair.a}" vs "${pair.b}"`);
console.log(` Expected: ${pair.expected}`);
console.log(` Simple: ${simpleResults.similarities[i].toFixed(3)}`);
console.log(` ONNX: ${onnxResults.similarities[i].toFixed(3)}`);
console.log();
}
// Recommendation
console.log('='.repeat(60));
console.log('RECOMMENDATION');
console.log('='.repeat(60));
if (onnxResults.semanticAccuracy > simpleResults.semanticAccuracy + 20) {
console.log(`
ONNX embeddings provide significantly better semantic accuracy
(${onnxResults.semanticAccuracy}% vs ${simpleResults.semanticAccuracy}%).
For tasks requiring semantic understanding (routing, pattern matching),
use ONNX embeddings:
export AGENTIC_FLOW_EMBEDDINGS=onnx
Note: First embedding takes ~${(onnxResults.avgColdLatency / 1000).toFixed(1)}s (model loading).
Subsequent embeddings: ~${onnxResults.avgWarmLatency.toFixed(1)}ms.
`);
}
else {
console.log(`
Simple embeddings are sufficient for your use case.
Semantic accuracy difference is minimal.
Keep using simple embeddings for maximum speed:
export AGENTIC_FLOW_EMBEDDINGS=simple
`);
}
}
async function benchmarkService(service, name) {
// Cold start (first embedding, includes model loading for ONNX)
console.log(`\n[${name}] Cold start embedding...`);
const coldStart = performance.now();
await service.embed(TEST_TEXTS[0]);
const coldLatency = performance.now() - coldStart;
console.log(` Cold latency: ${coldLatency.toFixed(2)}ms`);
// Warm embeddings
console.log(`[${name}] Warm embeddings (${TEST_TEXTS.length} texts)...`);
service.clearCache();
const warmStart = performance.now();
for (const text of TEST_TEXTS) {
await service.embed(text);
}
const warmTotalLatency = performance.now() - warmStart;
const avgWarmLatency = warmTotalLatency / TEST_TEXTS.length;
console.log(` Total: ${warmTotalLatency.toFixed(2)}ms, Avg: ${avgWarmLatency.toFixed(2)}ms`);
// Batch embedding
console.log(`[${name}] Batch embedding (10 texts)...`);
service.clearCache();
const batchStart = performance.now();
await service.embedBatch(TEST_TEXTS);
const batchLatency = performance.now() - batchStart;
const batchPerText = batchLatency / TEST_TEXTS.length;
console.log(` Batch latency: ${batchLatency.toFixed(2)}ms (${batchPerText.toFixed(2)}ms per text)`);
// Compare batch vs sequential
const speedup = avgWarmLatency > 0 ? avgWarmLatency / batchPerText : 0;
console.log(` Batch speedup: ${speedup.toFixed(1)}x vs sequential`);
// Semantic similarity tests
console.log(`[${name}] Semantic similarity tests...`);
const similarities = [];
let correctCount = 0;
for (const pair of SEMANTIC_PAIRS) {
const sim = await service.similarity(pair.a, pair.b);
similarities.push(sim);
// For ONNX (semantic), use proper thresholds
// For simple (hash-based), it will score incorrectly on unrelated pairs
const isCorrect = (pair.expected === 'high' && sim > 0.5) ||
(pair.expected === 'medium' && sim >= 0.2 && sim <= 0.6) ||
(pair.expected === 'low' && sim < 0.3);
if (isCorrect)
correctCount++;
console.log(` "${pair.a.substring(0, 20)}..." vs "${pair.b.substring(0, 20)}...": ${sim.toFixed(3)} (expected: ${pair.expected})`);
}
const semanticAccuracy = Math.round((correctCount / SEMANTIC_PAIRS.length) * 100);
console.log(` Semantic accuracy: ${semanticAccuracy}%`);
const stats = service.getStats();
console.log(` Model: ${stats.modelName || 'N/A'}, SIMD: ${stats.simdAvailable ?? 'N/A'}`);
return {
avgColdLatency: coldLatency,
avgWarmLatency,
batchLatency,
dimension: stats.dimension,
semanticAccuracy,
similarities,
};
}
// Run if executed directly
runBenchmark().catch(console.error);
//# sourceMappingURL=embedding-benchmark.js.map