380 lines
11 KiB
TypeScript
380 lines
11 KiB
TypeScript
/**
|
|
* EmbeddingService - Unified embedding interface for agentic-flow
|
|
*
|
|
* Uses ruvector@0.1.61+ for ONNX embeddings with:
|
|
* - SIMD128 acceleration (6x faster)
|
|
* - Parallel worker threads (7 workers)
|
|
* - all-MiniLM-L6-v2 model (384 dimensions)
|
|
* - Persistent SQLite cache (0.1ms vs 400ms)
|
|
*
|
|
* Configure via:
|
|
* - AGENTIC_FLOW_EMBEDDINGS=simple|onnx|auto (default: auto)
|
|
* - AGENTIC_FLOW_EMBEDDING_MODEL=all-MiniLM-L6-v2 (default)
|
|
* - AGENTIC_FLOW_EMBEDDING_CACHE=true|false (default: true)
|
|
* - AGENTIC_FLOW_PERSISTENT_CACHE=true|false (default: true)
|
|
*/
|
|
export type EmbeddingBackend = 'simple' | 'onnx' | 'auto';
|
|
export interface EmbeddingStats {
|
|
backend: EmbeddingBackend;
|
|
effectiveBackend: EmbeddingBackend;
|
|
dimension: number;
|
|
totalEmbeddings: number;
|
|
totalLatencyMs: number;
|
|
avgLatencyMs: number;
|
|
cacheHits: number;
|
|
modelLoaded: boolean;
|
|
modelName?: string;
|
|
simdAvailable?: boolean;
|
|
parallelWorkers?: number;
|
|
persistentCache?: {
|
|
enabled: boolean;
|
|
entries: number;
|
|
hits: number;
|
|
misses: number;
|
|
hitRate: number;
|
|
dbSizeKB: number;
|
|
};
|
|
}
|
|
export interface SimilarityResult {
|
|
similarity: number;
|
|
timeMs: number;
|
|
}
|
|
export interface SearchResult {
|
|
text: string;
|
|
index: number;
|
|
similarity: number;
|
|
}
|
|
export interface DuplicateGroup {
|
|
indices: number[];
|
|
texts: string[];
|
|
similarity: number;
|
|
}
|
|
export declare class EmbeddingService {
|
|
private static instance;
|
|
private backend;
|
|
private effectiveBackend;
|
|
private dimension;
|
|
private modelName;
|
|
private modelLoaded;
|
|
private loadingPromise;
|
|
private totalEmbeddings;
|
|
private totalLatencyMs;
|
|
private cacheHits;
|
|
private cache;
|
|
private cacheEnabled;
|
|
private persistentCache;
|
|
private persistentCacheEnabled;
|
|
private corpus;
|
|
private constructor();
|
|
static getInstance(): EmbeddingService;
|
|
/**
|
|
* Resolve the effective backend based on ONNX detection
|
|
*/
|
|
private resolveBackend;
|
|
/**
|
|
* Get configured backend (may be 'auto')
|
|
*/
|
|
getBackend(): EmbeddingBackend;
|
|
/**
|
|
* Get effective backend after detection
|
|
*/
|
|
getEffectiveBackend(): EmbeddingBackend;
|
|
/**
|
|
* Get embedding dimension
|
|
*/
|
|
getDimension(): number;
|
|
/**
|
|
* Check if ONNX model is loaded
|
|
*/
|
|
isModelLoaded(): boolean;
|
|
/**
|
|
* Generate embedding for text
|
|
* Auto-detects ONNX and uses it if available (default behavior)
|
|
*/
|
|
embed(text: string): Promise<Float32Array>;
|
|
/**
|
|
* Generate embeddings for multiple texts (batch processing with parallel workers)
|
|
* Batch processing provides significant speedup with parallel ONNX workers
|
|
*/
|
|
embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
/**
|
|
* Compute similarity between two texts
|
|
*/
|
|
similarity(text1: string, text2: string): Promise<number>;
|
|
/**
|
|
* Compute NxN similarity matrix for a list of texts
|
|
* Uses parallel workers for ONNX backend
|
|
*/
|
|
similarityMatrix(texts: string[]): Promise<number[][]>;
|
|
/**
|
|
* Build a corpus for semantic search
|
|
*/
|
|
buildCorpus(texts: string[]): Promise<void>;
|
|
/**
|
|
* Semantic search against the corpus
|
|
* Returns top-k most similar texts
|
|
*/
|
|
semanticSearch(query: string, topK?: number): Promise<SearchResult[]>;
|
|
/**
|
|
* Find near-duplicate texts in a list
|
|
* Groups texts with similarity above threshold
|
|
*/
|
|
findDuplicates(texts: string[], threshold?: number): Promise<DuplicateGroup[]>;
|
|
/**
|
|
* K-means clustering of texts
|
|
* Returns cluster assignments and centroids
|
|
*/
|
|
clusterTexts(texts: string[], k?: number, maxIterations?: number): Promise<{
|
|
clusters: number[];
|
|
centroids: Float32Array[];
|
|
}>;
|
|
/**
|
|
* Stream embeddings for large batches (memory efficient)
|
|
* Yields embeddings one at a time
|
|
*/
|
|
streamEmbed(texts: string[], batchSize?: number): AsyncGenerator<{
|
|
index: number;
|
|
text: string;
|
|
embedding: Float32Array;
|
|
}>;
|
|
/**
|
|
* Simple hash-based embedding (fast, not semantic)
|
|
*/
|
|
simpleEmbed(text: string, dim?: number): Float32Array;
|
|
/**
|
|
* Compute cosine similarity between two embeddings
|
|
*/
|
|
cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
/**
|
|
* Get statistics
|
|
*/
|
|
getStats(): EmbeddingStats;
|
|
/**
|
|
* Clear in-memory cache
|
|
*/
|
|
clearCache(): void;
|
|
/**
|
|
* Clear persistent cache (SQLite)
|
|
*/
|
|
clearPersistentCache(): void;
|
|
/**
|
|
* Clear all caches (memory + persistent)
|
|
*/
|
|
clearAllCaches(): void;
|
|
/**
|
|
* Get persistent cache stats
|
|
*/
|
|
getPersistentCacheStats(): {
|
|
entries: number;
|
|
hits: number;
|
|
misses: number;
|
|
hitRate: number;
|
|
} | null;
|
|
/**
|
|
* Clear corpus
|
|
*/
|
|
clearCorpus(): void;
|
|
/**
|
|
* Shutdown (cleanup workers)
|
|
*/
|
|
shutdown(): Promise<void>;
|
|
/**
|
|
* Reset instance (for testing)
|
|
*/
|
|
static reset(): Promise<void>;
|
|
/**
|
|
* Pretrain cache with texts from files
|
|
* Embeds content and stores in persistent cache for fast retrieval
|
|
*
|
|
* @param sources - File paths or glob patterns, or array of texts
|
|
* @param options - Pretrain options
|
|
* @returns Stats about pretraining
|
|
*/
|
|
pretrain(sources: string | string[], options?: {
|
|
batchSize?: number;
|
|
onProgress?: (processed: number, total: number) => void;
|
|
chunkSize?: number;
|
|
overlapSize?: number;
|
|
skipCached?: boolean;
|
|
}): Promise<{
|
|
processed: number;
|
|
cached: number;
|
|
skipped: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Pretrain with common programming patterns
|
|
* Pre-caches embeddings for frequently used code patterns
|
|
*/
|
|
pretrainCodePatterns(): Promise<{
|
|
cached: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Pretrain from repository structure
|
|
* Analyzes file names and paths to pre-cache common patterns
|
|
*/
|
|
pretrainFromRepo(repoPath?: string): Promise<{
|
|
files: number;
|
|
chunks: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Incremental pretrain - only process changed files since last run
|
|
* Uses git diff to detect modified files
|
|
*/
|
|
pretrainIncremental(options?: {
|
|
since?: string;
|
|
repoPath?: string;
|
|
}): Promise<{
|
|
changedFiles: number;
|
|
newChunks: number;
|
|
skipped: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Smart chunking - split code by semantic boundaries
|
|
* (functions, classes, etc.) instead of fixed size
|
|
*/
|
|
semanticChunk(content: string, fileType: string): string[];
|
|
/**
|
|
* Pretrain with semantic chunking
|
|
* Uses code structure to create meaningful chunks
|
|
*/
|
|
pretrainSemantic(sources: string[], options?: {
|
|
batchSize?: number;
|
|
onProgress?: (processed: number, total: number) => void;
|
|
}): Promise<{
|
|
files: number;
|
|
chunks: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Priority pretrain - cache most frequently used patterns first
|
|
* Tracks access patterns and prioritizes high-frequency queries
|
|
*/
|
|
private accessCounts;
|
|
recordAccess(text: string): void;
|
|
getTopPatterns(n?: number): string[];
|
|
pretrainPriority(n?: number): Promise<{
|
|
cached: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Warmup cache on session start
|
|
* Combines code patterns + recent repo changes
|
|
*/
|
|
warmup(repoPath?: string): Promise<{
|
|
patterns: number;
|
|
recentChanges: number;
|
|
timeMs: number;
|
|
}>;
|
|
/**
|
|
* Intelligent pretrain using ruvector worker pool
|
|
* Analyzes repo structure, code patterns, and prepares cache
|
|
* Uses parallel workers for maximum throughput
|
|
*/
|
|
pretrainIntelligent(options?: {
|
|
repoPath?: string;
|
|
parallel?: boolean;
|
|
onProgress?: (stage: string, progress: number) => void;
|
|
}): Promise<{
|
|
stages: {
|
|
codePatterns: {
|
|
count: number;
|
|
timeMs: number;
|
|
};
|
|
astAnalysis: {
|
|
files: number;
|
|
functions: number;
|
|
timeMs: number;
|
|
};
|
|
gitHistory: {
|
|
commits: number;
|
|
hotFiles: number;
|
|
timeMs: number;
|
|
};
|
|
dependencies: {
|
|
modules: number;
|
|
imports: number;
|
|
timeMs: number;
|
|
};
|
|
semanticChunks: {
|
|
chunks: number;
|
|
timeMs: number;
|
|
};
|
|
};
|
|
totalCached: number;
|
|
totalTimeMs: number;
|
|
}>;
|
|
/**
|
|
* Background pretrain - runs in worker if available
|
|
* Non-blocking, returns immediately with a promise
|
|
*/
|
|
pretrainBackground(options?: {
|
|
repoPath?: string;
|
|
}): {
|
|
promise: Promise<void>;
|
|
cancel: () => void;
|
|
};
|
|
/**
|
|
* AI-enhanced pretrain using ruvector attention mechanisms
|
|
* Uses HyperbolicAttention for code structure, MoE for routing
|
|
*/
|
|
pretrainWithAI(options?: {
|
|
repoPath?: string;
|
|
attentionType?: 'hyperbolic' | 'moe' | 'graph' | 'auto';
|
|
onProgress?: (stage: string, detail: string) => void;
|
|
}): Promise<{
|
|
patterns: {
|
|
type: string;
|
|
count: number;
|
|
}[];
|
|
attention: {
|
|
type: string;
|
|
timeMs: number;
|
|
};
|
|
predictions: {
|
|
prefetch: number;
|
|
confidence: number;
|
|
};
|
|
totalCached: number;
|
|
totalTimeMs: number;
|
|
}>;
|
|
/**
|
|
* Context-aware prefetch using attention
|
|
* Predicts what embeddings will be needed based on current context
|
|
*/
|
|
prefetchForContext(context: {
|
|
currentFile?: string;
|
|
recentFiles?: string[];
|
|
taskType?: 'edit' | 'review' | 'debug' | 'test' | 'refactor';
|
|
userQuery?: string;
|
|
}): Promise<{
|
|
prefetched: number;
|
|
confidence: number;
|
|
timeMs: number;
|
|
}>;
|
|
}
|
|
export declare function getEmbeddingService(): EmbeddingService;
|
|
export declare function embed(text: string): Promise<Float32Array>;
|
|
export declare function embedBatch(texts: string[]): Promise<Float32Array[]>;
|
|
export declare function pretrainCodePatterns(): Promise<{
|
|
cached: number;
|
|
timeMs: number;
|
|
}>;
|
|
export declare function pretrainFromRepo(repoPath?: string): Promise<{
|
|
files: number;
|
|
chunks: number;
|
|
timeMs: number;
|
|
}>;
|
|
export declare function textSimilarity(text1: string, text2: string): Promise<number>;
|
|
export declare function simpleEmbed(text: string, dim?: number): Float32Array;
|
|
export declare function similarityMatrix(texts: string[]): Promise<number[][]>;
|
|
export declare function semanticSearch(query: string, topK?: number): Promise<SearchResult[]>;
|
|
export declare function findDuplicates(texts: string[], threshold?: number): Promise<DuplicateGroup[]>;
|
|
export declare function clusterTexts(texts: string[], k?: number): Promise<{
|
|
clusters: number[];
|
|
centroids: Float32Array[];
|
|
}>;
|
|
//# sourceMappingURL=EmbeddingService.d.ts.map
|