tasq/node_modules/@claude-flow/embeddings/dist/embedding-service.js

945 lines
33 KiB
JavaScript

/**
* V3 Embedding Service Implementation
*
* Production embedding service aligned with agentic-flow@alpha:
* - OpenAI provider (text-embedding-3-small/large)
* - Transformers.js provider (local ONNX models)
* - Mock provider (development/testing)
*
* Performance Targets:
* - Single embedding: <100ms (API), <50ms (local)
* - Batch embedding: <500ms for 10 items
* - Cache hit: <1ms
*/
import { EventEmitter } from 'events';
import { normalize } from './normalization.js';
import { PersistentEmbeddingCache } from './persistent-cache.js';
// ============================================================================
// LRU Cache Implementation
// ============================================================================
class LRUCache {
maxSize;
cache = new Map();
hits = 0;
misses = 0;
constructor(maxSize) {
this.maxSize = maxSize;
}
get(key) {
const value = this.cache.get(key);
if (value !== undefined) {
// Move to end (most recently used)
this.cache.delete(key);
this.cache.set(key, value);
this.hits++;
return value;
}
this.misses++;
return undefined;
}
set(key, value) {
if (this.cache.has(key)) {
this.cache.delete(key);
}
else if (this.cache.size >= this.maxSize) {
// Remove oldest (first) entry
const firstKey = this.cache.keys().next().value;
if (firstKey !== undefined) {
this.cache.delete(firstKey);
}
}
this.cache.set(key, value);
}
clear() {
this.cache.clear();
this.hits = 0;
this.misses = 0;
}
get size() {
return this.cache.size;
}
get hitRate() {
const total = this.hits + this.misses;
return total > 0 ? this.hits / total : 0;
}
getStats() {
return {
size: this.cache.size,
maxSize: this.maxSize,
hits: this.hits,
misses: this.misses,
hitRate: this.hitRate,
};
}
}
// ============================================================================
// Base Embedding Service
// ============================================================================
class BaseEmbeddingService extends EventEmitter {
config;
cache;
persistentCache = null;
embeddingListeners = new Set();
normalizationType;
constructor(config) {
super();
this.config = config;
this.cache = new LRUCache(config.cacheSize ?? 1000);
this.normalizationType = config.normalization ?? 'none';
// Initialize persistent cache if configured
if (config.persistentCache?.enabled) {
const pcConfig = config.persistentCache;
this.persistentCache = new PersistentEmbeddingCache({
dbPath: pcConfig.dbPath ?? '.cache/embeddings.db',
maxSize: pcConfig.maxSize ?? 10000,
ttlMs: pcConfig.ttlMs,
});
}
}
/**
* Apply normalization to embedding if configured
*/
applyNormalization(embedding) {
if (this.normalizationType === 'none') {
return embedding;
}
return normalize(embedding, { type: this.normalizationType });
}
/**
* Check persistent cache for embedding
*/
async checkPersistentCache(text) {
if (!this.persistentCache)
return null;
return this.persistentCache.get(text);
}
/**
* Store embedding in persistent cache
*/
async storePersistentCache(text, embedding) {
if (!this.persistentCache)
return;
await this.persistentCache.set(text, embedding);
}
emitEvent(event) {
for (const listener of this.embeddingListeners) {
try {
listener(event);
}
catch (error) {
console.error('Error in embedding event listener:', error);
}
}
this.emit(event.type, event);
}
addEventListener(listener) {
this.embeddingListeners.add(listener);
}
removeEventListener(listener) {
this.embeddingListeners.delete(listener);
}
clearCache() {
const size = this.cache.size;
this.cache.clear();
this.emitEvent({ type: 'cache_eviction', size });
}
getCacheStats() {
const stats = this.cache.getStats();
return {
size: stats.size,
maxSize: stats.maxSize,
hitRate: stats.hitRate,
};
}
async shutdown() {
this.clearCache();
this.embeddingListeners.clear();
}
}
// ============================================================================
// OpenAI Embedding Service
// ============================================================================
export class OpenAIEmbeddingService extends BaseEmbeddingService {
provider = 'openai';
apiKey;
model;
baseURL;
timeout;
maxRetries;
constructor(config) {
super(config);
this.apiKey = config.apiKey;
this.model = config.model ?? 'text-embedding-3-small';
this.baseURL = config.baseURL ?? 'https://api.openai.com/v1/embeddings';
this.timeout = config.timeout ?? 30000;
this.maxRetries = config.maxRetries ?? 3;
}
async embed(text) {
// Check cache
const cached = this.cache.get(text);
if (cached) {
this.emitEvent({ type: 'cache_hit', text });
return {
embedding: cached,
latencyMs: 0,
cached: true,
};
}
this.emitEvent({ type: 'embed_start', text });
const startTime = performance.now();
try {
const response = await this.callOpenAI([text]);
const embedding = new Float32Array(response.data[0].embedding);
// Cache result
this.cache.set(text, embedding);
const latencyMs = performance.now() - startTime;
this.emitEvent({ type: 'embed_complete', text, latencyMs });
return {
embedding,
latencyMs,
usage: {
promptTokens: response.usage?.prompt_tokens ?? 0,
totalTokens: response.usage?.total_tokens ?? 0,
},
};
}
catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
this.emitEvent({ type: 'embed_error', text, error: message });
throw new Error(`OpenAI embedding failed: ${message}`);
}
}
async embedBatch(texts) {
this.emitEvent({ type: 'batch_start', count: texts.length });
const startTime = performance.now();
// Check cache for each text
const cached = [];
const uncached = [];
texts.forEach((text, index) => {
const cachedEmbedding = this.cache.get(text);
if (cachedEmbedding) {
cached.push({ index, embedding: cachedEmbedding });
this.emitEvent({ type: 'cache_hit', text });
}
else {
uncached.push({ index, text });
}
});
// Fetch uncached embeddings
let apiEmbeddings = [];
let usage = { promptTokens: 0, totalTokens: 0 };
if (uncached.length > 0) {
const response = await this.callOpenAI(uncached.map(u => u.text));
apiEmbeddings = response.data.map(d => new Float32Array(d.embedding));
// Cache results
uncached.forEach((item, i) => {
this.cache.set(item.text, apiEmbeddings[i]);
});
usage = {
promptTokens: response.usage?.prompt_tokens ?? 0,
totalTokens: response.usage?.total_tokens ?? 0,
};
}
// Reconstruct result array in original order
const embeddings = new Array(texts.length);
cached.forEach(c => {
embeddings[c.index] = c.embedding;
});
uncached.forEach((u, i) => {
embeddings[u.index] = apiEmbeddings[i];
});
const totalLatencyMs = performance.now() - startTime;
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
return {
embeddings,
totalLatencyMs,
avgLatencyMs: totalLatencyMs / texts.length,
usage,
cacheStats: {
hits: cached.length,
misses: uncached.length,
},
};
}
async callOpenAI(texts) {
const config = this.config;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
const response = await fetch(this.baseURL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify({
model: this.model,
input: texts,
dimensions: config.dimensions,
}),
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
const error = await response.text();
throw new Error(`OpenAI API error: ${response.status} - ${error}`);
}
return await response.json();
}
catch (error) {
if (attempt === this.maxRetries - 1) {
throw error;
}
// Exponential backoff
await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 100));
}
}
throw new Error('Max retries exceeded');
}
}
// ============================================================================
// Transformers.js Embedding Service
// ============================================================================
export class TransformersEmbeddingService extends BaseEmbeddingService {
provider = 'transformers';
pipeline = null;
modelName;
initialized = false;
constructor(config) {
super(config);
this.modelName = config.model ?? 'Xenova/all-MiniLM-L6-v2';
}
async initialize() {
if (this.initialized)
return;
try {
const { pipeline } = await import('@xenova/transformers');
this.pipeline = await pipeline('feature-extraction', this.modelName);
this.initialized = true;
}
catch (error) {
throw new Error(`Failed to initialize transformers.js: ${error}`);
}
}
async embed(text) {
await this.initialize();
// Check cache
const cached = this.cache.get(text);
if (cached) {
this.emitEvent({ type: 'cache_hit', text });
return {
embedding: cached,
latencyMs: 0,
cached: true,
};
}
this.emitEvent({ type: 'embed_start', text });
const startTime = performance.now();
try {
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
const embedding = new Float32Array(output.data);
// Cache result
this.cache.set(text, embedding);
const latencyMs = performance.now() - startTime;
this.emitEvent({ type: 'embed_complete', text, latencyMs });
return {
embedding,
latencyMs,
};
}
catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
this.emitEvent({ type: 'embed_error', text, error: message });
throw new Error(`Transformers.js embedding failed: ${message}`);
}
}
async embedBatch(texts) {
await this.initialize();
this.emitEvent({ type: 'batch_start', count: texts.length });
const startTime = performance.now();
const embeddings = [];
let cacheHits = 0;
for (const text of texts) {
const cached = this.cache.get(text);
if (cached) {
embeddings.push(cached);
cacheHits++;
this.emitEvent({ type: 'cache_hit', text });
}
else {
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
const embedding = new Float32Array(output.data);
this.cache.set(text, embedding);
embeddings.push(embedding);
}
}
const totalLatencyMs = performance.now() - startTime;
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
return {
embeddings,
totalLatencyMs,
avgLatencyMs: totalLatencyMs / texts.length,
cacheStats: {
hits: cacheHits,
misses: texts.length - cacheHits,
},
};
}
}
// ============================================================================
// Mock Embedding Service
// ============================================================================
export class MockEmbeddingService extends BaseEmbeddingService {
provider = 'mock';
dimensions;
simulatedLatency;
constructor(config = {}) {
const fullConfig = {
provider: 'mock',
dimensions: config.dimensions ?? 384,
cacheSize: config.cacheSize ?? 1000,
simulatedLatency: config.simulatedLatency ?? 0,
enableCache: config.enableCache ?? true,
};
super(fullConfig);
this.dimensions = fullConfig.dimensions;
this.simulatedLatency = fullConfig.simulatedLatency;
}
async embed(text) {
// Check cache
const cached = this.cache.get(text);
if (cached) {
this.emitEvent({ type: 'cache_hit', text });
return {
embedding: cached,
latencyMs: 0,
cached: true,
};
}
this.emitEvent({ type: 'embed_start', text });
const startTime = performance.now();
// Simulate latency
if (this.simulatedLatency > 0) {
await new Promise(resolve => setTimeout(resolve, this.simulatedLatency));
}
const embedding = this.hashEmbedding(text);
this.cache.set(text, embedding);
const latencyMs = performance.now() - startTime;
this.emitEvent({ type: 'embed_complete', text, latencyMs });
return {
embedding,
latencyMs,
};
}
async embedBatch(texts) {
this.emitEvent({ type: 'batch_start', count: texts.length });
const startTime = performance.now();
const embeddings = [];
let cacheHits = 0;
for (const text of texts) {
const cached = this.cache.get(text);
if (cached) {
embeddings.push(cached);
cacheHits++;
}
else {
const embedding = this.hashEmbedding(text);
this.cache.set(text, embedding);
embeddings.push(embedding);
}
}
const totalLatencyMs = performance.now() - startTime;
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
return {
embeddings,
totalLatencyMs,
avgLatencyMs: totalLatencyMs / texts.length,
cacheStats: {
hits: cacheHits,
misses: texts.length - cacheHits,
},
};
}
/**
* Generate deterministic hash-based embedding
*/
hashEmbedding(text) {
const embedding = new Float32Array(this.dimensions);
// Seed with text hash
let hash = 0;
for (let i = 0; i < text.length; i++) {
hash = (hash << 5) - hash + text.charCodeAt(i);
hash = hash & hash;
}
// Generate pseudo-random embedding
for (let i = 0; i < this.dimensions; i++) {
const seed = hash + i * 2654435761;
const x = Math.sin(seed) * 10000;
embedding[i] = x - Math.floor(x);
}
// Normalize to unit vector
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
for (let i = 0; i < this.dimensions; i++) {
embedding[i] /= norm;
}
return embedding;
}
}
// ============================================================================
// Agentic-Flow Embedding Service
// ============================================================================
/**
* Agentic-Flow embedding service using OptimizedEmbedder
*
* Features:
* - ONNX-based embeddings with SIMD acceleration
* - 256-entry LRU cache with FNV-1a hash
* - 8x loop unrolling for cosine similarity
* - Pre-allocated buffers (no GC pressure)
* - 3-4x faster batch processing
*/
export class AgenticFlowEmbeddingService extends BaseEmbeddingService {
provider = 'agentic-flow';
embedder = null;
initialized = false;
modelId;
dimensions;
embedderCacheSize;
modelDir;
autoDownload;
constructor(config) {
super(config);
this.modelId = config.modelId ?? 'all-MiniLM-L6-v2';
this.dimensions = config.dimensions ?? 384;
this.embedderCacheSize = config.embedderCacheSize ?? 256;
this.modelDir = config.modelDir;
this.autoDownload = config.autoDownload ?? false;
}
async initialize() {
if (this.initialized)
return;
let lastError;
const createEmbedder = async (modulePath) => {
try {
// Use file:// protocol for absolute paths
const importPath = modulePath.startsWith('/') ? `file://${modulePath}` : modulePath;
const module = await import(/* webpackIgnore: true */ importPath);
const getOptimizedEmbedder = module.getOptimizedEmbedder || module.default?.getOptimizedEmbedder;
if (!getOptimizedEmbedder) {
lastError = new Error(`Module loaded but getOptimizedEmbedder not found`);
return false;
}
// Only include defined values to not override defaults
const embedderConfig = {
modelId: this.modelId,
dimension: this.dimensions,
cacheSize: this.embedderCacheSize,
autoDownload: this.autoDownload,
};
if (this.modelDir !== undefined) {
embedderConfig.modelDir = this.modelDir;
}
this.embedder = getOptimizedEmbedder(embedderConfig);
await this.embedder.init();
this.initialized = true;
return true;
}
catch (error) {
lastError = error instanceof Error ? error : new Error(String(error));
return false;
}
};
// Build list of possible module paths to try
const possiblePaths = [];
// Try proper package exports first (preferred)
possiblePaths.push('agentic-flow/embeddings');
// Try node_modules resolution from different locations (for file:// imports)
try {
const path = await import('path');
const { existsSync } = await import('fs');
const cwd = process.cwd();
// Prioritize absolute paths that exist (for file:// import fallback)
const absolutePaths = [
path.join(cwd, 'node_modules/agentic-flow/dist/embeddings/optimized-embedder.js'),
path.join(cwd, '../node_modules/agentic-flow/dist/embeddings/optimized-embedder.js'),
'/workspaces/claude-flow/node_modules/agentic-flow/dist/embeddings/optimized-embedder.js',
];
for (const p of absolutePaths) {
if (existsSync(p)) {
possiblePaths.push(p);
}
}
}
catch {
// fs/path module not available
}
// Try each path
for (const modulePath of possiblePaths) {
if (await createEmbedder(modulePath)) {
return;
}
}
const errorDetail = lastError?.message ? ` Last error: ${lastError.message}` : '';
throw new Error(`Failed to initialize agentic-flow embeddings.${errorDetail} ` +
`Ensure agentic-flow is installed and ONNX model is downloaded: ` +
`npx agentic-flow@alpha embeddings init`);
}
async embed(text) {
await this.initialize();
// Check our LRU cache first
const cached = this.cache.get(text);
if (cached) {
this.emitEvent({ type: 'cache_hit', text });
return {
embedding: cached,
latencyMs: 0,
cached: true,
};
}
this.emitEvent({ type: 'embed_start', text });
const startTime = performance.now();
try {
// Use agentic-flow's optimized embedder (has its own internal cache)
const embedding = await this.embedder.embed(text);
// Store in our cache as well
this.cache.set(text, embedding);
const latencyMs = performance.now() - startTime;
this.emitEvent({ type: 'embed_complete', text, latencyMs });
return {
embedding,
latencyMs,
};
}
catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error';
this.emitEvent({ type: 'embed_error', text, error: message });
throw new Error(`Agentic-flow embedding failed: ${message}`);
}
}
async embedBatch(texts) {
await this.initialize();
this.emitEvent({ type: 'batch_start', count: texts.length });
const startTime = performance.now();
// Check cache for each text
const cached = [];
const uncached = [];
texts.forEach((text, index) => {
const cachedEmbedding = this.cache.get(text);
if (cachedEmbedding) {
cached.push({ index, embedding: cachedEmbedding });
this.emitEvent({ type: 'cache_hit', text });
}
else {
uncached.push({ index, text });
}
});
// Use optimized batch embedding for uncached texts
let batchEmbeddings = [];
if (uncached.length > 0) {
const uncachedTexts = uncached.map(u => u.text);
batchEmbeddings = await this.embedder.embedBatch(uncachedTexts);
// Cache results
uncached.forEach((item, i) => {
this.cache.set(item.text, batchEmbeddings[i]);
});
}
// Reconstruct result array in original order
const embeddings = new Array(texts.length);
cached.forEach(c => {
embeddings[c.index] = c.embedding;
});
uncached.forEach((u, i) => {
embeddings[u.index] = batchEmbeddings[i];
});
const totalLatencyMs = performance.now() - startTime;
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
return {
embeddings,
totalLatencyMs,
avgLatencyMs: totalLatencyMs / texts.length,
cacheStats: {
hits: cached.length,
misses: uncached.length,
},
};
}
/**
* Get combined cache statistics from both our LRU cache and embedder's internal cache
*/
getCacheStats() {
const baseStats = super.getCacheStats();
if (this.embedder && this.embedder.getCacheStats) {
const embedderStats = this.embedder.getCacheStats();
return {
size: baseStats.size + embedderStats.size,
maxSize: baseStats.maxSize + embedderStats.maxSize,
hitRate: baseStats.hitRate,
embedderCache: embedderStats,
};
}
return baseStats;
}
async shutdown() {
if (this.embedder && this.embedder.clearCache) {
this.embedder.clearCache();
}
await super.shutdown();
}
}
// ============================================================================
// Factory Functions
// ============================================================================
/**
* Check if agentic-flow is available
*/
async function isAgenticFlowAvailable() {
try {
await import('agentic-flow/embeddings');
return true;
}
catch {
return false;
}
}
/**
* Auto-install agentic-flow and initialize model
*/
async function autoInstallAgenticFlow() {
const { exec } = await import('child_process');
const { promisify } = await import('util');
const execAsync = promisify(exec);
try {
// Check if already available
if (await isAgenticFlowAvailable()) {
return true;
}
console.log('[embeddings] Installing agentic-flow@alpha...');
await execAsync('npm install agentic-flow@alpha --save', { timeout: 120000 });
// Initialize the model
console.log('[embeddings] Downloading embedding model...');
await execAsync('npx agentic-flow@alpha embeddings init', { timeout: 300000 });
// Verify installation
return await isAgenticFlowAvailable();
}
catch (error) {
console.warn('[embeddings] Auto-install failed:', error instanceof Error ? error.message : error);
return false;
}
}
/**
* Create embedding service based on configuration (sync version)
* Note: For 'auto' provider or smart fallback, use createEmbeddingServiceAsync
*/
export function createEmbeddingService(config) {
switch (config.provider) {
case 'openai':
return new OpenAIEmbeddingService(config);
case 'transformers':
return new TransformersEmbeddingService(config);
case 'mock':
return new MockEmbeddingService(config);
case 'agentic-flow':
return new AgenticFlowEmbeddingService(config);
default:
console.warn(`Unknown provider, using mock`);
return new MockEmbeddingService({ provider: 'mock', dimensions: 384 });
}
}
/**
* Create embedding service with automatic provider detection and fallback
*
* Features:
* - 'auto' provider picks best available: agentic-flow > transformers > mock
* - Automatic fallback if primary provider fails to initialize
* - Pre-validates provider availability before returning
*
* @example
* // Auto-select best provider
* const service = await createEmbeddingServiceAsync({ provider: 'auto' });
*
* // Try agentic-flow, fallback to transformers
* const service = await createEmbeddingServiceAsync({
* provider: 'agentic-flow',
* fallback: 'transformers'
* });
*/
export async function createEmbeddingServiceAsync(config) {
const { provider, fallback, autoInstall = true, ...rest } = config;
// Auto provider selection
if (provider === 'auto') {
// Try agentic-flow first (fastest, ONNX-based)
let agenticFlowAvailable = await isAgenticFlowAvailable();
// Auto-install if not available and autoInstall is enabled
if (!agenticFlowAvailable && autoInstall) {
agenticFlowAvailable = await autoInstallAgenticFlow();
}
if (agenticFlowAvailable) {
try {
const service = new AgenticFlowEmbeddingService({
provider: 'agentic-flow',
modelId: rest.modelId ?? 'all-MiniLM-L6-v2',
dimensions: rest.dimensions ?? 384,
cacheSize: rest.cacheSize,
});
// Validate it can initialize
await service.embed('test');
return service;
}
catch {
// Fall through to next option
}
}
// Try transformers (good quality, built-in)
try {
const service = new TransformersEmbeddingService({
provider: 'transformers',
model: rest.model ?? 'Xenova/all-MiniLM-L6-v2',
cacheSize: rest.cacheSize,
});
// Validate it can initialize
await service.embed('test');
return service;
}
catch {
// Fall through to mock
}
// Fallback to mock (always works)
console.warn('[embeddings] Using mock provider - install agentic-flow or @xenova/transformers for real embeddings');
return new MockEmbeddingService({
dimensions: rest.dimensions ?? 384,
cacheSize: rest.cacheSize,
});
}
// Specific provider with optional fallback
const createPrimary = () => {
switch (provider) {
case 'agentic-flow':
return new AgenticFlowEmbeddingService({
provider: 'agentic-flow',
modelId: rest.modelId ?? 'all-MiniLM-L6-v2',
dimensions: rest.dimensions ?? 384,
cacheSize: rest.cacheSize,
});
case 'transformers':
return new TransformersEmbeddingService({
provider: 'transformers',
model: rest.model ?? 'Xenova/all-MiniLM-L6-v2',
cacheSize: rest.cacheSize,
});
case 'openai':
if (!rest.apiKey)
throw new Error('OpenAI provider requires apiKey');
return new OpenAIEmbeddingService({
provider: 'openai',
apiKey: rest.apiKey,
dimensions: rest.dimensions,
cacheSize: rest.cacheSize,
});
case 'mock':
return new MockEmbeddingService({
dimensions: rest.dimensions ?? 384,
cacheSize: rest.cacheSize,
});
default:
throw new Error(`Unknown provider: ${provider}`);
}
};
const primary = createPrimary();
// Try to validate primary provider
try {
await primary.embed('test');
return primary;
}
catch (error) {
if (!fallback) {
throw error;
}
// Try fallback
console.warn(`[embeddings] Primary provider '${provider}' failed, using fallback '${fallback}'`);
const fallbackConfig = { ...rest, provider: fallback };
return createEmbeddingServiceAsync(fallbackConfig);
}
}
/**
* Convenience function for quick embeddings
*/
export async function getEmbedding(text, config) {
const service = createEmbeddingService({
provider: 'mock',
dimensions: 384,
...config,
});
try {
const result = await service.embed(text);
return result.embedding;
}
finally {
await service.shutdown();
}
}
// ============================================================================
// Similarity Functions
// ============================================================================
/**
* Compute cosine similarity between two embeddings
*/
export function cosineSimilarity(a, b) {
if (a.length !== b.length) {
throw new Error('Embedding dimensions must match');
}
let dot = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const denom = Math.sqrt(normA) * Math.sqrt(normB);
return denom > 0 ? dot / denom : 0;
}
/**
* Compute Euclidean distance between two embeddings
*/
export function euclideanDistance(a, b) {
if (a.length !== b.length) {
throw new Error('Embedding dimensions must match');
}
let sum = 0;
for (let i = 0; i < a.length; i++) {
const diff = a[i] - b[i];
sum += diff * diff;
}
return Math.sqrt(sum);
}
/**
* Compute dot product between two embeddings
*/
export function dotProduct(a, b) {
if (a.length !== b.length) {
throw new Error('Embedding dimensions must match');
}
let dot = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
}
return dot;
}
/**
* Compute similarity using specified metric
*/
export function computeSimilarity(a, b, metric = 'cosine') {
switch (metric) {
case 'cosine':
return { score: cosineSimilarity(a, b), metric };
case 'euclidean':
// Convert distance to similarity (closer = higher score)
return { score: 1 / (1 + euclideanDistance(a, b)), metric };
case 'dot':
return { score: dotProduct(a, b), metric };
default:
return { score: cosineSimilarity(a, b), metric: 'cosine' };
}
}
//# sourceMappingURL=embedding-service.js.map