/** * Embedding Normalization Utilities * * Features: * - L2 (Euclidean) normalization * - L1 (Manhattan) normalization * - Min-max normalization * - Z-score standardization * - Batch normalization */ /** * L2 (Euclidean) normalize embedding to unit length * Most common for cosine similarity * * @param embedding - Input embedding vector * @param epsilon - Small value to prevent division by zero * @returns Normalized embedding with ||v|| = 1 */ export function l2Normalize(embedding, epsilon = 1e-12) { const result = embedding instanceof Float32Array ? new Float32Array(embedding.length) : new Float32Array(embedding.length); // Calculate L2 norm (Euclidean length) let sumSquares = 0; for (let i = 0; i < embedding.length; i++) { sumSquares += embedding[i] * embedding[i]; } const norm = Math.sqrt(sumSquares); const scale = norm > epsilon ? 1 / norm : 0; // Normalize for (let i = 0; i < embedding.length; i++) { result[i] = embedding[i] * scale; } return result; } /** * L2 normalize embedding in-place (modifies original array) */ export function l2NormalizeInPlace(embedding, epsilon = 1e-12) { let sumSquares = 0; for (let i = 0; i < embedding.length; i++) { sumSquares += embedding[i] * embedding[i]; } const norm = Math.sqrt(sumSquares); const scale = norm > epsilon ? 1 / norm : 0; for (let i = 0; i < embedding.length; i++) { embedding[i] *= scale; } return embedding; } /** * L1 (Manhattan) normalize embedding * Sum of absolute values = 1 */ export function l1Normalize(embedding, epsilon = 1e-12) { const result = new Float32Array(embedding.length); // Calculate L1 norm (sum of absolute values) let sumAbs = 0; for (let i = 0; i < embedding.length; i++) { sumAbs += Math.abs(embedding[i]); } const scale = sumAbs > epsilon ? 1 / sumAbs : 0; for (let i = 0; i < embedding.length; i++) { result[i] = embedding[i] * scale; } return result; } /** * Min-max normalize embedding to [0, 1] range */ export function minMaxNormalize(embedding, epsilon = 1e-12) { const result = new Float32Array(embedding.length); // Find min and max let min = Infinity; let max = -Infinity; for (let i = 0; i < embedding.length; i++) { if (embedding[i] < min) min = embedding[i]; if (embedding[i] > max) max = embedding[i]; } const range = max - min; const scale = range > epsilon ? 1 / range : 0; for (let i = 0; i < embedding.length; i++) { result[i] = (embedding[i] - min) * scale; } return result; } /** * Z-score standardize embedding (mean=0, std=1) */ export function zScoreNormalize(embedding, epsilon = 1e-12) { const result = new Float32Array(embedding.length); const n = embedding.length; // Calculate mean let sum = 0; for (let i = 0; i < n; i++) { sum += embedding[i]; } const mean = sum / n; // Calculate standard deviation let sumSquaredDiff = 0; for (let i = 0; i < n; i++) { const diff = embedding[i] - mean; sumSquaredDiff += diff * diff; } const std = Math.sqrt(sumSquaredDiff / n); const scale = std > epsilon ? 1 / std : 0; // Standardize for (let i = 0; i < n; i++) { result[i] = (embedding[i] - mean) * scale; } return result; } /** * Normalize embedding using specified method */ export function normalize(embedding, options = {}) { const { type = 'l2', epsilon = 1e-12, inPlace = false } = options; if (type === 'none') { return embedding instanceof Float32Array ? embedding : new Float32Array(embedding); } if (inPlace && embedding instanceof Float32Array && type === 'l2') { return l2NormalizeInPlace(embedding, epsilon); } switch (type) { case 'l2': return l2Normalize(embedding, epsilon); case 'l1': return l1Normalize(embedding, epsilon); case 'minmax': return minMaxNormalize(embedding, epsilon); case 'zscore': return zScoreNormalize(embedding, epsilon); default: return l2Normalize(embedding, epsilon); } } /** * Batch normalize multiple embeddings */ export function normalizeBatch(embeddings, options = {}) { return embeddings.map(emb => normalize(emb, options)); } /** * Calculate L2 norm of embedding */ export function l2Norm(embedding) { let sumSquares = 0; for (let i = 0; i < embedding.length; i++) { sumSquares += embedding[i] * embedding[i]; } return Math.sqrt(sumSquares); } /** * Check if embedding is already normalized (L2 norm ≈ 1) */ export function isNormalized(embedding, tolerance = 1e-6) { const norm = l2Norm(embedding); return Math.abs(norm - 1) < tolerance; } /** * Center embeddings by subtracting mean across batch * Useful for improving similarity metrics */ export function centerEmbeddings(embeddings) { if (embeddings.length === 0) return []; const dim = embeddings[0].length; const n = embeddings.length; // Calculate mean for each dimension const mean = new Float32Array(dim); for (const emb of embeddings) { for (let i = 0; i < dim; i++) { mean[i] += emb[i]; } } for (let i = 0; i < dim; i++) { mean[i] /= n; } // Subtract mean from each embedding return embeddings.map(emb => { const centered = new Float32Array(dim); for (let i = 0; i < dim; i++) { centered[i] = emb[i] - mean[i]; } return centered; }); } //# sourceMappingURL=normalization.js.map