/** * Attention Module Fallbacks * * Since @ruvector/attention is completely broken, provide JavaScript fallbacks * Performance will be slower but functionality will work */ /** * Scaled Dot-Product Attention * The core attention mechanism */ export function scaledDotProductAttention(query, key, value, mask) { const dk = query.length; // Compute attention scores: Q · K^T / sqrt(dk) let score = 0; for (let i = 0; i < dk; i++) { score += query[i] * key[i]; } score /= Math.sqrt(dk); // Apply mask if provided if (mask && mask[0] === 0) { score = -Infinity; } // Softmax (single score version) const expScore = Math.exp(score); const weight = expScore; // Simplified for single K,V pair // Weighted value const output = value.map(v => v * weight); return { output, weights: [weight] }; } /** * Multi-Head Attention (JavaScript fallback) * * Replaces broken @ruvector/attention.multiHeadAttention */ export class MultiHeadAttention { numHeads; hiddenDim; headDim; queryWeights; keyWeights; valueWeights; outputWeights; constructor(config) { this.numHeads = config.numHeads || 8; this.hiddenDim = config.hiddenDim; this.headDim = Math.floor(this.hiddenDim / this.numHeads); // Initialize weights (random) this.queryWeights = this.initializeWeights(); this.keyWeights = this.initializeWeights(); this.valueWeights = this.initializeWeights(); this.outputWeights = this.initializeOutputWeights(); } initializeWeights() { const weights = []; for (let h = 0; h < this.numHeads; h++) { const headWeights = []; for (let i = 0; i < this.headDim; i++) { const row = []; for (let j = 0; j < this.hiddenDim; j++) { row.push((Math.random() - 0.5) * 0.1); } headWeights.push(row); } weights.push(headWeights); } return weights; } initializeOutputWeights() { const weights = []; for (let i = 0; i < this.hiddenDim; i++) { const row = []; for (let j = 0; j < this.hiddenDim; j++) { row.push((Math.random() - 0.5) * 0.1); } weights.push(row); } return weights; } forward(query, key, value, mask) { const headOutputs = []; const allWeights = []; // Process each head for (let h = 0; h < this.numHeads; h++) { // Project to head dimension const q = this.project(query, this.queryWeights[h]); const k = this.project(key, this.keyWeights[h]); const v = this.project(value, this.valueWeights[h]); // Attention for this head const { output, weights } = scaledDotProductAttention(q, k, v, mask); headOutputs.push(output); allWeights.push(weights); } // Concatenate heads const concatenated = headOutputs.flat(); // Output projection const output = this.project(concatenated, this.outputWeights); return { output, attentionWeights: allWeights }; } project(input, weights) { const output = []; for (let i = 0; i < weights.length; i++) { let sum = 0; for (let j = 0; j < input.length; j++) { sum += input[j] * weights[i][j]; } output.push(sum); } return output; } } /** * Flash Attention (optimized fallback) * * Replaces broken @ruvector/attention.flashAttention * Uses tiling/chunking for better memory efficiency */ export class FlashAttention { hiddenDim; blockSize; constructor(config) { this.hiddenDim = config.hiddenDim; this.blockSize = Math.min(64, this.hiddenDim); // Tile size } forward(query, key, value, numHeads = 8) { const seqLen = query.length; const headDim = this.hiddenDim / numHeads; const output = []; const attentionScores = []; // Process in blocks for memory efficiency for (let i = 0; i < seqLen; i += this.blockSize) { const blockEnd = Math.min(i + this.blockSize, seqLen); for (let qi = i; qi < blockEnd; qi++) { const scores = []; let maxScore = -Infinity; // Compute attention scores for this query for (let ki = 0; ki < seqLen; ki++) { let score = 0; for (let d = 0; d < query[qi].length; d++) { score += query[qi][d] * key[ki][d]; } score /= Math.sqrt(headDim); scores.push(score); maxScore = Math.max(maxScore, score); } // Numerically stable softmax const expScores = scores.map(s => Math.exp(s - maxScore)); const sumExp = expScores.reduce((a, b) => a + b, 0); const weights = expScores.map(e => e / sumExp); // Weighted sum of values const outputRow = new Array(value[0].length).fill(0); for (let vi = 0; vi < seqLen; vi++) { for (let d = 0; d < value[vi].length; d++) { outputRow[d] += weights[vi] * value[vi][d]; } } output.push(outputRow); attentionScores.push(weights); } } return { output, attentionScores }; } } /** * Linear Attention (fallback) * * O(n) complexity approximation of attention */ export class LinearAttention { hiddenDim; featureMap; constructor(config) { this.hiddenDim = config.hiddenDim; // ELU feature map this.featureMap = (x) => (x > 0 ? x : Math.exp(x) - 1); } forward(query, key, value) { const seqLen = query.length; const dim = value[0].length; // Apply feature map const queryMapped = query.map(q => q.map(this.featureMap)); const keyMapped = key.map(k => k.map(this.featureMap)); // Compute K^T V (dimension: [dim, valueDim]) const ktv = Array.from({ length: this.hiddenDim }, () => Array(dim).fill(0)); for (let i = 0; i < seqLen; i++) { for (let d1 = 0; d1 < this.hiddenDim; d1++) { for (let d2 = 0; d2 < dim; d2++) { ktv[d1][d2] += keyMapped[i][d1] * value[i][d2]; } } } // Compute Q (K^T V) const output = []; for (let i = 0; i < seqLen; i++) { const row = []; for (let d2 = 0; d2 < dim; d2++) { let sum = 0; for (let d1 = 0; d1 < this.hiddenDim; d1++) { sum += queryMapped[i][d1] * ktv[d1][d2]; } row.push(sum); } // Normalize const normSum = queryMapped[i].reduce((a, b) => a + b, 0); output.push(row.map(v => v / (normSum + 1e-8))); } return { output }; } } /** * Hyperbolic Attention (simplified fallback) * * Approximation using hyperbolic geometry */ export class HyperbolicAttention { hiddenDim; curvature; constructor(config) { this.hiddenDim = config.hiddenDim; this.curvature = -1.0; // Poincaré ball curvature } forward(query, key, value) { // Hyperbolic distance (simplified) const distance = this.hyperbolicDistance(query, key); // Attention weight based on hyperbolic distance const weight = Math.exp(-distance); // Weighted value const output = value.map(v => v * weight); return { output, distance }; } hyperbolicDistance(a, b) { // Simplified hyperbolic distance in Poincaré ball let normDiffSq = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; normDiffSq += diff * diff; } const normASq = a.reduce((sum, v) => sum + v * v, 0); const normBSq = b.reduce((sum, v) => sum + v * v, 0); const numerator = normDiffSq; const denominator = (1 - normASq) * (1 - normBSq); return Math.acosh(1 + (2 * numerator) / denominator); } } /** * MoE (Mixture of Experts) Attention (fallback) * * Routes to different expert attention modules */ export class MoEAttention { experts; numExperts; gatingWeights; constructor(config) { this.numExperts = config.numExperts || 4; this.experts = Array.from({ length: this.numExperts }, () => new MultiHeadAttention(config)); // Initialize gating network weights this.gatingWeights = Array.from({ length: this.numExperts }, () => Array.from({ length: config.hiddenDim }, () => (Math.random() - 0.5) * 0.1)); } forward(query, key, value, topK = 2) { // Compute gating scores const gatingScores = this.gatingWeights.map(weights => { let score = 0; for (let i = 0; i < query.length; i++) { score += query[i] * weights[i]; } return score; }); // Softmax over top-K experts const expScores = gatingScores.map(s => Math.exp(s)); const sumExp = expScores.reduce((a, b) => a + b, 0); const expertWeights = expScores.map(e => e / sumExp); // Get top-K experts const expertIndices = expertWeights .map((weight, idx) => ({ weight, idx })) .sort((a, b) => b.weight - a.weight) .slice(0, topK); // Weighted combination of expert outputs const output = new Array(query.length).fill(0); for (const { weight, idx } of expertIndices) { const expertOutput = this.experts[idx].forward(query, key, value).output; for (let i = 0; i < output.length; i++) { output[i] += weight * expertOutput[i]; } } return { output, expertWeights }; } } /** * Check if native attention is available */ export function isNativeAttentionAvailable() { try { const attention = require('@ruvector/attention'); // Try a simple operation const result = attention.flashAttention(new Float32Array([1, 0]), new Float32Array([1, 0]), new Float32Array([1, 0]), 1); return true; } catch { return false; } } /** * Factory function to create appropriate attention module */ export function createAttention(type, config) { switch (type) { case 'multi-head': return new MultiHeadAttention(config); case 'flash': return new FlashAttention(config); case 'linear': return new LinearAttention(config); case 'hyperbolic': return new HyperbolicAttention(config); case 'moe': return new MoEAttention(config); default: return new MultiHeadAttention(config); } } //# sourceMappingURL=attention-fallbacks.js.map