945 lines
33 KiB
JavaScript
945 lines
33 KiB
JavaScript
/**
|
|
* V3 Embedding Service Implementation
|
|
*
|
|
* Production embedding service aligned with agentic-flow@alpha:
|
|
* - OpenAI provider (text-embedding-3-small/large)
|
|
* - Transformers.js provider (local ONNX models)
|
|
* - Mock provider (development/testing)
|
|
*
|
|
* Performance Targets:
|
|
* - Single embedding: <100ms (API), <50ms (local)
|
|
* - Batch embedding: <500ms for 10 items
|
|
* - Cache hit: <1ms
|
|
*/
|
|
import { EventEmitter } from 'events';
|
|
import { normalize } from './normalization.js';
|
|
import { PersistentEmbeddingCache } from './persistent-cache.js';
|
|
// ============================================================================
|
|
// LRU Cache Implementation
|
|
// ============================================================================
|
|
class LRUCache {
|
|
maxSize;
|
|
cache = new Map();
|
|
hits = 0;
|
|
misses = 0;
|
|
constructor(maxSize) {
|
|
this.maxSize = maxSize;
|
|
}
|
|
get(key) {
|
|
const value = this.cache.get(key);
|
|
if (value !== undefined) {
|
|
// Move to end (most recently used)
|
|
this.cache.delete(key);
|
|
this.cache.set(key, value);
|
|
this.hits++;
|
|
return value;
|
|
}
|
|
this.misses++;
|
|
return undefined;
|
|
}
|
|
set(key, value) {
|
|
if (this.cache.has(key)) {
|
|
this.cache.delete(key);
|
|
}
|
|
else if (this.cache.size >= this.maxSize) {
|
|
// Remove oldest (first) entry
|
|
const firstKey = this.cache.keys().next().value;
|
|
if (firstKey !== undefined) {
|
|
this.cache.delete(firstKey);
|
|
}
|
|
}
|
|
this.cache.set(key, value);
|
|
}
|
|
clear() {
|
|
this.cache.clear();
|
|
this.hits = 0;
|
|
this.misses = 0;
|
|
}
|
|
get size() {
|
|
return this.cache.size;
|
|
}
|
|
get hitRate() {
|
|
const total = this.hits + this.misses;
|
|
return total > 0 ? this.hits / total : 0;
|
|
}
|
|
getStats() {
|
|
return {
|
|
size: this.cache.size,
|
|
maxSize: this.maxSize,
|
|
hits: this.hits,
|
|
misses: this.misses,
|
|
hitRate: this.hitRate,
|
|
};
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Base Embedding Service
|
|
// ============================================================================
|
|
class BaseEmbeddingService extends EventEmitter {
|
|
config;
|
|
cache;
|
|
persistentCache = null;
|
|
embeddingListeners = new Set();
|
|
normalizationType;
|
|
constructor(config) {
|
|
super();
|
|
this.config = config;
|
|
this.cache = new LRUCache(config.cacheSize ?? 1000);
|
|
this.normalizationType = config.normalization ?? 'none';
|
|
// Initialize persistent cache if configured
|
|
if (config.persistentCache?.enabled) {
|
|
const pcConfig = config.persistentCache;
|
|
this.persistentCache = new PersistentEmbeddingCache({
|
|
dbPath: pcConfig.dbPath ?? '.cache/embeddings.db',
|
|
maxSize: pcConfig.maxSize ?? 10000,
|
|
ttlMs: pcConfig.ttlMs,
|
|
});
|
|
}
|
|
}
|
|
/**
|
|
* Apply normalization to embedding if configured
|
|
*/
|
|
applyNormalization(embedding) {
|
|
if (this.normalizationType === 'none') {
|
|
return embedding;
|
|
}
|
|
return normalize(embedding, { type: this.normalizationType });
|
|
}
|
|
/**
|
|
* Check persistent cache for embedding
|
|
*/
|
|
async checkPersistentCache(text) {
|
|
if (!this.persistentCache)
|
|
return null;
|
|
return this.persistentCache.get(text);
|
|
}
|
|
/**
|
|
* Store embedding in persistent cache
|
|
*/
|
|
async storePersistentCache(text, embedding) {
|
|
if (!this.persistentCache)
|
|
return;
|
|
await this.persistentCache.set(text, embedding);
|
|
}
|
|
emitEvent(event) {
|
|
for (const listener of this.embeddingListeners) {
|
|
try {
|
|
listener(event);
|
|
}
|
|
catch (error) {
|
|
console.error('Error in embedding event listener:', error);
|
|
}
|
|
}
|
|
this.emit(event.type, event);
|
|
}
|
|
addEventListener(listener) {
|
|
this.embeddingListeners.add(listener);
|
|
}
|
|
removeEventListener(listener) {
|
|
this.embeddingListeners.delete(listener);
|
|
}
|
|
clearCache() {
|
|
const size = this.cache.size;
|
|
this.cache.clear();
|
|
this.emitEvent({ type: 'cache_eviction', size });
|
|
}
|
|
getCacheStats() {
|
|
const stats = this.cache.getStats();
|
|
return {
|
|
size: stats.size,
|
|
maxSize: stats.maxSize,
|
|
hitRate: stats.hitRate,
|
|
};
|
|
}
|
|
async shutdown() {
|
|
this.clearCache();
|
|
this.embeddingListeners.clear();
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// OpenAI Embedding Service
|
|
// ============================================================================
|
|
export class OpenAIEmbeddingService extends BaseEmbeddingService {
|
|
provider = 'openai';
|
|
apiKey;
|
|
model;
|
|
baseURL;
|
|
timeout;
|
|
maxRetries;
|
|
constructor(config) {
|
|
super(config);
|
|
this.apiKey = config.apiKey;
|
|
this.model = config.model ?? 'text-embedding-3-small';
|
|
this.baseURL = config.baseURL ?? 'https://api.openai.com/v1/embeddings';
|
|
this.timeout = config.timeout ?? 30000;
|
|
this.maxRetries = config.maxRetries ?? 3;
|
|
}
|
|
async embed(text) {
|
|
// Check cache
|
|
const cached = this.cache.get(text);
|
|
if (cached) {
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
return {
|
|
embedding: cached,
|
|
latencyMs: 0,
|
|
cached: true,
|
|
};
|
|
}
|
|
this.emitEvent({ type: 'embed_start', text });
|
|
const startTime = performance.now();
|
|
try {
|
|
const response = await this.callOpenAI([text]);
|
|
const embedding = new Float32Array(response.data[0].embedding);
|
|
// Cache result
|
|
this.cache.set(text, embedding);
|
|
const latencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'embed_complete', text, latencyMs });
|
|
return {
|
|
embedding,
|
|
latencyMs,
|
|
usage: {
|
|
promptTokens: response.usage?.prompt_tokens ?? 0,
|
|
totalTokens: response.usage?.total_tokens ?? 0,
|
|
},
|
|
};
|
|
}
|
|
catch (error) {
|
|
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
this.emitEvent({ type: 'embed_error', text, error: message });
|
|
throw new Error(`OpenAI embedding failed: ${message}`);
|
|
}
|
|
}
|
|
async embedBatch(texts) {
|
|
this.emitEvent({ type: 'batch_start', count: texts.length });
|
|
const startTime = performance.now();
|
|
// Check cache for each text
|
|
const cached = [];
|
|
const uncached = [];
|
|
texts.forEach((text, index) => {
|
|
const cachedEmbedding = this.cache.get(text);
|
|
if (cachedEmbedding) {
|
|
cached.push({ index, embedding: cachedEmbedding });
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
}
|
|
else {
|
|
uncached.push({ index, text });
|
|
}
|
|
});
|
|
// Fetch uncached embeddings
|
|
let apiEmbeddings = [];
|
|
let usage = { promptTokens: 0, totalTokens: 0 };
|
|
if (uncached.length > 0) {
|
|
const response = await this.callOpenAI(uncached.map(u => u.text));
|
|
apiEmbeddings = response.data.map(d => new Float32Array(d.embedding));
|
|
// Cache results
|
|
uncached.forEach((item, i) => {
|
|
this.cache.set(item.text, apiEmbeddings[i]);
|
|
});
|
|
usage = {
|
|
promptTokens: response.usage?.prompt_tokens ?? 0,
|
|
totalTokens: response.usage?.total_tokens ?? 0,
|
|
};
|
|
}
|
|
// Reconstruct result array in original order
|
|
const embeddings = new Array(texts.length);
|
|
cached.forEach(c => {
|
|
embeddings[c.index] = c.embedding;
|
|
});
|
|
uncached.forEach((u, i) => {
|
|
embeddings[u.index] = apiEmbeddings[i];
|
|
});
|
|
const totalLatencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
|
|
return {
|
|
embeddings,
|
|
totalLatencyMs,
|
|
avgLatencyMs: totalLatencyMs / texts.length,
|
|
usage,
|
|
cacheStats: {
|
|
hits: cached.length,
|
|
misses: uncached.length,
|
|
},
|
|
};
|
|
}
|
|
async callOpenAI(texts) {
|
|
const config = this.config;
|
|
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
|
|
try {
|
|
const controller = new AbortController();
|
|
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
const response = await fetch(this.baseURL, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
Authorization: `Bearer ${this.apiKey}`,
|
|
},
|
|
body: JSON.stringify({
|
|
model: this.model,
|
|
input: texts,
|
|
dimensions: config.dimensions,
|
|
}),
|
|
signal: controller.signal,
|
|
});
|
|
clearTimeout(timeoutId);
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`OpenAI API error: ${response.status} - ${error}`);
|
|
}
|
|
return await response.json();
|
|
}
|
|
catch (error) {
|
|
if (attempt === this.maxRetries - 1) {
|
|
throw error;
|
|
}
|
|
// Exponential backoff
|
|
await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 100));
|
|
}
|
|
}
|
|
throw new Error('Max retries exceeded');
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Transformers.js Embedding Service
|
|
// ============================================================================
|
|
export class TransformersEmbeddingService extends BaseEmbeddingService {
|
|
provider = 'transformers';
|
|
pipeline = null;
|
|
modelName;
|
|
initialized = false;
|
|
constructor(config) {
|
|
super(config);
|
|
this.modelName = config.model ?? 'Xenova/all-MiniLM-L6-v2';
|
|
}
|
|
async initialize() {
|
|
if (this.initialized)
|
|
return;
|
|
try {
|
|
const { pipeline } = await import('@xenova/transformers');
|
|
this.pipeline = await pipeline('feature-extraction', this.modelName);
|
|
this.initialized = true;
|
|
}
|
|
catch (error) {
|
|
throw new Error(`Failed to initialize transformers.js: ${error}`);
|
|
}
|
|
}
|
|
async embed(text) {
|
|
await this.initialize();
|
|
// Check cache
|
|
const cached = this.cache.get(text);
|
|
if (cached) {
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
return {
|
|
embedding: cached,
|
|
latencyMs: 0,
|
|
cached: true,
|
|
};
|
|
}
|
|
this.emitEvent({ type: 'embed_start', text });
|
|
const startTime = performance.now();
|
|
try {
|
|
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
|
|
const embedding = new Float32Array(output.data);
|
|
// Cache result
|
|
this.cache.set(text, embedding);
|
|
const latencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'embed_complete', text, latencyMs });
|
|
return {
|
|
embedding,
|
|
latencyMs,
|
|
};
|
|
}
|
|
catch (error) {
|
|
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
this.emitEvent({ type: 'embed_error', text, error: message });
|
|
throw new Error(`Transformers.js embedding failed: ${message}`);
|
|
}
|
|
}
|
|
async embedBatch(texts) {
|
|
await this.initialize();
|
|
this.emitEvent({ type: 'batch_start', count: texts.length });
|
|
const startTime = performance.now();
|
|
const embeddings = [];
|
|
let cacheHits = 0;
|
|
for (const text of texts) {
|
|
const cached = this.cache.get(text);
|
|
if (cached) {
|
|
embeddings.push(cached);
|
|
cacheHits++;
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
}
|
|
else {
|
|
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
|
|
const embedding = new Float32Array(output.data);
|
|
this.cache.set(text, embedding);
|
|
embeddings.push(embedding);
|
|
}
|
|
}
|
|
const totalLatencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
|
|
return {
|
|
embeddings,
|
|
totalLatencyMs,
|
|
avgLatencyMs: totalLatencyMs / texts.length,
|
|
cacheStats: {
|
|
hits: cacheHits,
|
|
misses: texts.length - cacheHits,
|
|
},
|
|
};
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Mock Embedding Service
|
|
// ============================================================================
|
|
export class MockEmbeddingService extends BaseEmbeddingService {
|
|
provider = 'mock';
|
|
dimensions;
|
|
simulatedLatency;
|
|
constructor(config = {}) {
|
|
const fullConfig = {
|
|
provider: 'mock',
|
|
dimensions: config.dimensions ?? 384,
|
|
cacheSize: config.cacheSize ?? 1000,
|
|
simulatedLatency: config.simulatedLatency ?? 0,
|
|
enableCache: config.enableCache ?? true,
|
|
};
|
|
super(fullConfig);
|
|
this.dimensions = fullConfig.dimensions;
|
|
this.simulatedLatency = fullConfig.simulatedLatency;
|
|
}
|
|
async embed(text) {
|
|
// Check cache
|
|
const cached = this.cache.get(text);
|
|
if (cached) {
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
return {
|
|
embedding: cached,
|
|
latencyMs: 0,
|
|
cached: true,
|
|
};
|
|
}
|
|
this.emitEvent({ type: 'embed_start', text });
|
|
const startTime = performance.now();
|
|
// Simulate latency
|
|
if (this.simulatedLatency > 0) {
|
|
await new Promise(resolve => setTimeout(resolve, this.simulatedLatency));
|
|
}
|
|
const embedding = this.hashEmbedding(text);
|
|
this.cache.set(text, embedding);
|
|
const latencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'embed_complete', text, latencyMs });
|
|
return {
|
|
embedding,
|
|
latencyMs,
|
|
};
|
|
}
|
|
async embedBatch(texts) {
|
|
this.emitEvent({ type: 'batch_start', count: texts.length });
|
|
const startTime = performance.now();
|
|
const embeddings = [];
|
|
let cacheHits = 0;
|
|
for (const text of texts) {
|
|
const cached = this.cache.get(text);
|
|
if (cached) {
|
|
embeddings.push(cached);
|
|
cacheHits++;
|
|
}
|
|
else {
|
|
const embedding = this.hashEmbedding(text);
|
|
this.cache.set(text, embedding);
|
|
embeddings.push(embedding);
|
|
}
|
|
}
|
|
const totalLatencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
|
|
return {
|
|
embeddings,
|
|
totalLatencyMs,
|
|
avgLatencyMs: totalLatencyMs / texts.length,
|
|
cacheStats: {
|
|
hits: cacheHits,
|
|
misses: texts.length - cacheHits,
|
|
},
|
|
};
|
|
}
|
|
/**
|
|
* Generate deterministic hash-based embedding
|
|
*/
|
|
hashEmbedding(text) {
|
|
const embedding = new Float32Array(this.dimensions);
|
|
// Seed with text hash
|
|
let hash = 0;
|
|
for (let i = 0; i < text.length; i++) {
|
|
hash = (hash << 5) - hash + text.charCodeAt(i);
|
|
hash = hash & hash;
|
|
}
|
|
// Generate pseudo-random embedding
|
|
for (let i = 0; i < this.dimensions; i++) {
|
|
const seed = hash + i * 2654435761;
|
|
const x = Math.sin(seed) * 10000;
|
|
embedding[i] = x - Math.floor(x);
|
|
}
|
|
// Normalize to unit vector
|
|
const norm = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
for (let i = 0; i < this.dimensions; i++) {
|
|
embedding[i] /= norm;
|
|
}
|
|
return embedding;
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Agentic-Flow Embedding Service
|
|
// ============================================================================
|
|
/**
|
|
* Agentic-Flow embedding service using OptimizedEmbedder
|
|
*
|
|
* Features:
|
|
* - ONNX-based embeddings with SIMD acceleration
|
|
* - 256-entry LRU cache with FNV-1a hash
|
|
* - 8x loop unrolling for cosine similarity
|
|
* - Pre-allocated buffers (no GC pressure)
|
|
* - 3-4x faster batch processing
|
|
*/
|
|
export class AgenticFlowEmbeddingService extends BaseEmbeddingService {
|
|
provider = 'agentic-flow';
|
|
embedder = null;
|
|
initialized = false;
|
|
modelId;
|
|
dimensions;
|
|
embedderCacheSize;
|
|
modelDir;
|
|
autoDownload;
|
|
constructor(config) {
|
|
super(config);
|
|
this.modelId = config.modelId ?? 'all-MiniLM-L6-v2';
|
|
this.dimensions = config.dimensions ?? 384;
|
|
this.embedderCacheSize = config.embedderCacheSize ?? 256;
|
|
this.modelDir = config.modelDir;
|
|
this.autoDownload = config.autoDownload ?? false;
|
|
}
|
|
async initialize() {
|
|
if (this.initialized)
|
|
return;
|
|
let lastError;
|
|
const createEmbedder = async (modulePath) => {
|
|
try {
|
|
// Use file:// protocol for absolute paths
|
|
const importPath = modulePath.startsWith('/') ? `file://${modulePath}` : modulePath;
|
|
const module = await import(/* webpackIgnore: true */ importPath);
|
|
const getOptimizedEmbedder = module.getOptimizedEmbedder || module.default?.getOptimizedEmbedder;
|
|
if (!getOptimizedEmbedder) {
|
|
lastError = new Error(`Module loaded but getOptimizedEmbedder not found`);
|
|
return false;
|
|
}
|
|
// Only include defined values to not override defaults
|
|
const embedderConfig = {
|
|
modelId: this.modelId,
|
|
dimension: this.dimensions,
|
|
cacheSize: this.embedderCacheSize,
|
|
autoDownload: this.autoDownload,
|
|
};
|
|
if (this.modelDir !== undefined) {
|
|
embedderConfig.modelDir = this.modelDir;
|
|
}
|
|
this.embedder = getOptimizedEmbedder(embedderConfig);
|
|
await this.embedder.init();
|
|
this.initialized = true;
|
|
return true;
|
|
}
|
|
catch (error) {
|
|
lastError = error instanceof Error ? error : new Error(String(error));
|
|
return false;
|
|
}
|
|
};
|
|
// Build list of possible module paths to try
|
|
const possiblePaths = [];
|
|
// Try proper package exports first (preferred)
|
|
possiblePaths.push('agentic-flow/embeddings');
|
|
// Try node_modules resolution from different locations (for file:// imports)
|
|
try {
|
|
const path = await import('path');
|
|
const { existsSync } = await import('fs');
|
|
const cwd = process.cwd();
|
|
// Prioritize absolute paths that exist (for file:// import fallback)
|
|
const absolutePaths = [
|
|
path.join(cwd, 'node_modules/agentic-flow/dist/embeddings/optimized-embedder.js'),
|
|
path.join(cwd, '../node_modules/agentic-flow/dist/embeddings/optimized-embedder.js'),
|
|
'/workspaces/claude-flow/node_modules/agentic-flow/dist/embeddings/optimized-embedder.js',
|
|
];
|
|
for (const p of absolutePaths) {
|
|
if (existsSync(p)) {
|
|
possiblePaths.push(p);
|
|
}
|
|
}
|
|
}
|
|
catch {
|
|
// fs/path module not available
|
|
}
|
|
// Try each path
|
|
for (const modulePath of possiblePaths) {
|
|
if (await createEmbedder(modulePath)) {
|
|
return;
|
|
}
|
|
}
|
|
const errorDetail = lastError?.message ? ` Last error: ${lastError.message}` : '';
|
|
throw new Error(`Failed to initialize agentic-flow embeddings.${errorDetail} ` +
|
|
`Ensure agentic-flow is installed and ONNX model is downloaded: ` +
|
|
`npx agentic-flow@alpha embeddings init`);
|
|
}
|
|
async embed(text) {
|
|
await this.initialize();
|
|
// Check our LRU cache first
|
|
const cached = this.cache.get(text);
|
|
if (cached) {
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
return {
|
|
embedding: cached,
|
|
latencyMs: 0,
|
|
cached: true,
|
|
};
|
|
}
|
|
this.emitEvent({ type: 'embed_start', text });
|
|
const startTime = performance.now();
|
|
try {
|
|
// Use agentic-flow's optimized embedder (has its own internal cache)
|
|
const embedding = await this.embedder.embed(text);
|
|
// Store in our cache as well
|
|
this.cache.set(text, embedding);
|
|
const latencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'embed_complete', text, latencyMs });
|
|
return {
|
|
embedding,
|
|
latencyMs,
|
|
};
|
|
}
|
|
catch (error) {
|
|
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
this.emitEvent({ type: 'embed_error', text, error: message });
|
|
throw new Error(`Agentic-flow embedding failed: ${message}`);
|
|
}
|
|
}
|
|
async embedBatch(texts) {
|
|
await this.initialize();
|
|
this.emitEvent({ type: 'batch_start', count: texts.length });
|
|
const startTime = performance.now();
|
|
// Check cache for each text
|
|
const cached = [];
|
|
const uncached = [];
|
|
texts.forEach((text, index) => {
|
|
const cachedEmbedding = this.cache.get(text);
|
|
if (cachedEmbedding) {
|
|
cached.push({ index, embedding: cachedEmbedding });
|
|
this.emitEvent({ type: 'cache_hit', text });
|
|
}
|
|
else {
|
|
uncached.push({ index, text });
|
|
}
|
|
});
|
|
// Use optimized batch embedding for uncached texts
|
|
let batchEmbeddings = [];
|
|
if (uncached.length > 0) {
|
|
const uncachedTexts = uncached.map(u => u.text);
|
|
batchEmbeddings = await this.embedder.embedBatch(uncachedTexts);
|
|
// Cache results
|
|
uncached.forEach((item, i) => {
|
|
this.cache.set(item.text, batchEmbeddings[i]);
|
|
});
|
|
}
|
|
// Reconstruct result array in original order
|
|
const embeddings = new Array(texts.length);
|
|
cached.forEach(c => {
|
|
embeddings[c.index] = c.embedding;
|
|
});
|
|
uncached.forEach((u, i) => {
|
|
embeddings[u.index] = batchEmbeddings[i];
|
|
});
|
|
const totalLatencyMs = performance.now() - startTime;
|
|
this.emitEvent({ type: 'batch_complete', count: texts.length, latencyMs: totalLatencyMs });
|
|
return {
|
|
embeddings,
|
|
totalLatencyMs,
|
|
avgLatencyMs: totalLatencyMs / texts.length,
|
|
cacheStats: {
|
|
hits: cached.length,
|
|
misses: uncached.length,
|
|
},
|
|
};
|
|
}
|
|
/**
|
|
* Get combined cache statistics from both our LRU cache and embedder's internal cache
|
|
*/
|
|
getCacheStats() {
|
|
const baseStats = super.getCacheStats();
|
|
if (this.embedder && this.embedder.getCacheStats) {
|
|
const embedderStats = this.embedder.getCacheStats();
|
|
return {
|
|
size: baseStats.size + embedderStats.size,
|
|
maxSize: baseStats.maxSize + embedderStats.maxSize,
|
|
hitRate: baseStats.hitRate,
|
|
embedderCache: embedderStats,
|
|
};
|
|
}
|
|
return baseStats;
|
|
}
|
|
async shutdown() {
|
|
if (this.embedder && this.embedder.clearCache) {
|
|
this.embedder.clearCache();
|
|
}
|
|
await super.shutdown();
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Factory Functions
|
|
// ============================================================================
|
|
/**
|
|
* Check if agentic-flow is available
|
|
*/
|
|
async function isAgenticFlowAvailable() {
|
|
try {
|
|
await import('agentic-flow/embeddings');
|
|
return true;
|
|
}
|
|
catch {
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Auto-install agentic-flow and initialize model
|
|
*/
|
|
async function autoInstallAgenticFlow() {
|
|
const { exec } = await import('child_process');
|
|
const { promisify } = await import('util');
|
|
const execAsync = promisify(exec);
|
|
try {
|
|
// Check if already available
|
|
if (await isAgenticFlowAvailable()) {
|
|
return true;
|
|
}
|
|
console.log('[embeddings] Installing agentic-flow@alpha...');
|
|
await execAsync('npm install agentic-flow@alpha --save', { timeout: 120000 });
|
|
// Initialize the model
|
|
console.log('[embeddings] Downloading embedding model...');
|
|
await execAsync('npx agentic-flow@alpha embeddings init', { timeout: 300000 });
|
|
// Verify installation
|
|
return await isAgenticFlowAvailable();
|
|
}
|
|
catch (error) {
|
|
console.warn('[embeddings] Auto-install failed:', error instanceof Error ? error.message : error);
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Create embedding service based on configuration (sync version)
|
|
* Note: For 'auto' provider or smart fallback, use createEmbeddingServiceAsync
|
|
*/
|
|
export function createEmbeddingService(config) {
|
|
switch (config.provider) {
|
|
case 'openai':
|
|
return new OpenAIEmbeddingService(config);
|
|
case 'transformers':
|
|
return new TransformersEmbeddingService(config);
|
|
case 'mock':
|
|
return new MockEmbeddingService(config);
|
|
case 'agentic-flow':
|
|
return new AgenticFlowEmbeddingService(config);
|
|
default:
|
|
console.warn(`Unknown provider, using mock`);
|
|
return new MockEmbeddingService({ provider: 'mock', dimensions: 384 });
|
|
}
|
|
}
|
|
/**
|
|
* Create embedding service with automatic provider detection and fallback
|
|
*
|
|
* Features:
|
|
* - 'auto' provider picks best available: agentic-flow > transformers > mock
|
|
* - Automatic fallback if primary provider fails to initialize
|
|
* - Pre-validates provider availability before returning
|
|
*
|
|
* @example
|
|
* // Auto-select best provider
|
|
* const service = await createEmbeddingServiceAsync({ provider: 'auto' });
|
|
*
|
|
* // Try agentic-flow, fallback to transformers
|
|
* const service = await createEmbeddingServiceAsync({
|
|
* provider: 'agentic-flow',
|
|
* fallback: 'transformers'
|
|
* });
|
|
*/
|
|
export async function createEmbeddingServiceAsync(config) {
|
|
const { provider, fallback, autoInstall = true, ...rest } = config;
|
|
// Auto provider selection
|
|
if (provider === 'auto') {
|
|
// Try agentic-flow first (fastest, ONNX-based)
|
|
let agenticFlowAvailable = await isAgenticFlowAvailable();
|
|
// Auto-install if not available and autoInstall is enabled
|
|
if (!agenticFlowAvailable && autoInstall) {
|
|
agenticFlowAvailable = await autoInstallAgenticFlow();
|
|
}
|
|
if (agenticFlowAvailable) {
|
|
try {
|
|
const service = new AgenticFlowEmbeddingService({
|
|
provider: 'agentic-flow',
|
|
modelId: rest.modelId ?? 'all-MiniLM-L6-v2',
|
|
dimensions: rest.dimensions ?? 384,
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
// Validate it can initialize
|
|
await service.embed('test');
|
|
return service;
|
|
}
|
|
catch {
|
|
// Fall through to next option
|
|
}
|
|
}
|
|
// Try transformers (good quality, built-in)
|
|
try {
|
|
const service = new TransformersEmbeddingService({
|
|
provider: 'transformers',
|
|
model: rest.model ?? 'Xenova/all-MiniLM-L6-v2',
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
// Validate it can initialize
|
|
await service.embed('test');
|
|
return service;
|
|
}
|
|
catch {
|
|
// Fall through to mock
|
|
}
|
|
// Fallback to mock (always works)
|
|
console.warn('[embeddings] Using mock provider - install agentic-flow or @xenova/transformers for real embeddings');
|
|
return new MockEmbeddingService({
|
|
dimensions: rest.dimensions ?? 384,
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
}
|
|
// Specific provider with optional fallback
|
|
const createPrimary = () => {
|
|
switch (provider) {
|
|
case 'agentic-flow':
|
|
return new AgenticFlowEmbeddingService({
|
|
provider: 'agentic-flow',
|
|
modelId: rest.modelId ?? 'all-MiniLM-L6-v2',
|
|
dimensions: rest.dimensions ?? 384,
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
case 'transformers':
|
|
return new TransformersEmbeddingService({
|
|
provider: 'transformers',
|
|
model: rest.model ?? 'Xenova/all-MiniLM-L6-v2',
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
case 'openai':
|
|
if (!rest.apiKey)
|
|
throw new Error('OpenAI provider requires apiKey');
|
|
return new OpenAIEmbeddingService({
|
|
provider: 'openai',
|
|
apiKey: rest.apiKey,
|
|
dimensions: rest.dimensions,
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
case 'mock':
|
|
return new MockEmbeddingService({
|
|
dimensions: rest.dimensions ?? 384,
|
|
cacheSize: rest.cacheSize,
|
|
});
|
|
default:
|
|
throw new Error(`Unknown provider: ${provider}`);
|
|
}
|
|
};
|
|
const primary = createPrimary();
|
|
// Try to validate primary provider
|
|
try {
|
|
await primary.embed('test');
|
|
return primary;
|
|
}
|
|
catch (error) {
|
|
if (!fallback) {
|
|
throw error;
|
|
}
|
|
// Try fallback
|
|
console.warn(`[embeddings] Primary provider '${provider}' failed, using fallback '${fallback}'`);
|
|
const fallbackConfig = { ...rest, provider: fallback };
|
|
return createEmbeddingServiceAsync(fallbackConfig);
|
|
}
|
|
}
|
|
/**
|
|
* Convenience function for quick embeddings
|
|
*/
|
|
export async function getEmbedding(text, config) {
|
|
const service = createEmbeddingService({
|
|
provider: 'mock',
|
|
dimensions: 384,
|
|
...config,
|
|
});
|
|
try {
|
|
const result = await service.embed(text);
|
|
return result.embedding;
|
|
}
|
|
finally {
|
|
await service.shutdown();
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Similarity Functions
|
|
// ============================================================================
|
|
/**
|
|
* Compute cosine similarity between two embeddings
|
|
*/
|
|
export function cosineSimilarity(a, b) {
|
|
if (a.length !== b.length) {
|
|
throw new Error('Embedding dimensions must match');
|
|
}
|
|
let dot = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
return denom > 0 ? dot / denom : 0;
|
|
}
|
|
/**
|
|
* Compute Euclidean distance between two embeddings
|
|
*/
|
|
export function euclideanDistance(a, b) {
|
|
if (a.length !== b.length) {
|
|
throw new Error('Embedding dimensions must match');
|
|
}
|
|
let sum = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
const diff = a[i] - b[i];
|
|
sum += diff * diff;
|
|
}
|
|
return Math.sqrt(sum);
|
|
}
|
|
/**
|
|
* Compute dot product between two embeddings
|
|
*/
|
|
export function dotProduct(a, b) {
|
|
if (a.length !== b.length) {
|
|
throw new Error('Embedding dimensions must match');
|
|
}
|
|
let dot = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
}
|
|
return dot;
|
|
}
|
|
/**
|
|
* Compute similarity using specified metric
|
|
*/
|
|
export function computeSimilarity(a, b, metric = 'cosine') {
|
|
switch (metric) {
|
|
case 'cosine':
|
|
return { score: cosineSimilarity(a, b), metric };
|
|
case 'euclidean':
|
|
// Convert distance to similarity (closer = higher score)
|
|
return { score: 1 / (1 + euclideanDistance(a, b)), metric };
|
|
case 'dot':
|
|
return { score: dotProduct(a, b), metric };
|
|
default:
|
|
return { score: cosineSimilarity(a, b), metric: 'cosine' };
|
|
}
|
|
}
|
|
//# sourceMappingURL=embedding-service.js.map
|