tasq/node_modules/@claude-flow/neural/dist/modes/edge.js

310 lines
11 KiB
JavaScript

/**
* Edge Mode Implementation
*
* Optimized for resource-constrained environments with:
* - <5MB memory footprint
* - Minimal latency (<1ms)
* - Micro-LoRA (rank-1)
* - Aggressive pruning
* - Async updates
*/
import { BaseModeImplementation } from './base.js';
/**
* Edge mode for resource-constrained devices
*/
export class EdgeMode extends BaseModeImplementation {
mode = 'edge';
// Minimal pattern storage (compressed)
compressedPatterns = new Map();
// Quantized LoRA weights (int8)
quantizedWeights = new Map();
quantizationScale = 1.0;
// Pending async updates
pendingUpdates = [];
updateTimer = null;
// Stats
totalOps = 0;
totalTime = 0;
async initialize() {
await super.initialize();
this.compressedPatterns.clear();
this.quantizedWeights.clear();
this.pendingUpdates = [];
}
async cleanup() {
if (this.updateTimer) {
clearTimeout(this.updateTimer);
}
this.compressedPatterns.clear();
this.quantizedWeights.clear();
this.pendingUpdates = [];
await super.cleanup();
}
/**
* Find patterns using compressed embeddings
*/
async findPatterns(embedding, k, patterns) {
const startTime = performance.now();
// Compress input embedding for comparison
const compressedInput = this.compressEmbedding(embedding);
// Fast similarity on compressed representations
const matches = [];
for (const pattern of patterns) {
let compressed = this.compressedPatterns.get(pattern.patternId);
if (!compressed) {
compressed = this.createCompressedPattern(pattern);
this.compressedPatterns.set(pattern.patternId, compressed);
}
// Use compressed similarity (faster but less accurate)
const score = this.compressedSimilarity(compressedInput, compressed.embedding);
matches.push({ pattern, score });
}
// Quick partial sort
matches.sort((a, b) => b.score - a.score);
const topK = matches.slice(0, k);
this.totalOps++;
this.totalTime += performance.now() - startTime;
return topK.map(m => ({
pattern: m.pattern,
similarity: m.score,
confidence: m.score * m.pattern.successRate,
latencyMs: 0,
}));
}
/**
* Lightweight learning with async updates
*/
async learn(trajectories, config, ewcState) {
const startTime = performance.now();
if (trajectories.length === 0)
return 0;
// Edge mode: only learn from high-quality trajectories
const threshold = config.qualityThreshold;
const good = trajectories.filter(t => t.qualityScore >= threshold);
if (good.length === 0)
return 0;
const avgQuality = good.reduce((s, t) => s + t.qualityScore, 0) / good.length;
// Queue async update for later processing
this.queueAsyncUpdate(async () => {
await this.performLightweightUpdate(good, config.learningRate);
});
this.totalOps++;
this.totalTime += performance.now() - startTime;
return Math.max(0, avgQuality - 0.5);
}
/**
* Apply quantized LoRA
*/
async applyLoRA(input, weights) {
if (!weights) {
return input;
}
const output = new Float32Array(input.length);
output.set(input);
// Quantize weights if not already done
for (const module of ['q_proj', 'v_proj']) {
const A = weights.A.get(module);
const B = weights.B.get(module);
if (A && B) {
const qA = this.getOrQuantize(`A_${module}`, A);
const qB = this.getOrQuantize(`B_${module}`, B);
// Apply quantized LoRA
const adapted = this.applyQuantizedLoRA(input, qA, qB);
const alpha = 0.05; // Very small blending for edge
for (let i = 0; i < output.length; i++) {
output[i] = output[i] * (1 - alpha) + adapted[i] * alpha;
}
}
}
return output;
}
getStats() {
return {
avgLatencyMs: this.totalOps > 0 ? this.totalTime / this.totalOps : 0,
compressedPatterns: this.compressedPatterns.size,
quantizedWeights: this.quantizedWeights.size,
pendingUpdates: this.pendingUpdates.length,
memoryEstimateMb: this.estimateMemoryUsage(),
};
}
// ========================================================================
// Compression utilities
// ========================================================================
/**
* Compress embedding to 8-bit representation
*/
compressEmbedding(embedding) {
const compressed = new Int8Array(embedding.length);
const max = Math.max(...embedding.map(Math.abs));
const scale = max > 0 ? 127 / max : 1;
for (let i = 0; i < embedding.length; i++) {
compressed[i] = Math.round(embedding[i] * scale);
}
return compressed;
}
/**
* Create compressed pattern representation
*/
createCompressedPattern(pattern) {
return {
id: pattern.patternId,
embedding: this.compressEmbedding(pattern.embedding),
successRate: Math.round(pattern.successRate * 255),
usageCount: Math.min(pattern.usageCount, 255),
};
}
/**
* Fast similarity on compressed embeddings
*/
compressedSimilarity(a, b) {
if (a.length !== b.length)
return 0;
let dot = 0;
let normA = 0;
let normB = 0;
// Process in chunks of 8 for better cache utilization
const len = a.length;
for (let i = 0; i < len; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const denom = Math.sqrt(normA * normB);
return denom > 0 ? dot / denom : 0;
}
/**
* Get or create quantized weights
*/
getOrQuantize(key, weights) {
let quantized = this.quantizedWeights.get(key);
if (!quantized) {
quantized = this.quantizeWeights(weights);
this.quantizedWeights.set(key, quantized);
}
return quantized;
}
/**
* Quantize float weights to int8
*/
quantizeWeights(weights) {
const max = Math.max(...weights.map(Math.abs));
this.quantizationScale = max > 0 ? 127 / max : 1;
const quantized = new Int8Array(weights.length);
for (let i = 0; i < weights.length; i++) {
quantized[i] = Math.round(weights[i] * this.quantizationScale);
}
return quantized;
}
/**
* Apply LoRA with quantized weights
*/
applyQuantizedLoRA(input, qA, qB) {
const dim = input.length;
const rank = 1; // Edge mode uses rank-1
const output = new Float32Array(dim);
const dequantScale = 1 / this.quantizationScale;
// A * input -> intermediate (scalar for rank-1)
let intermediate = 0;
for (let d = 0; d < dim; d++) {
intermediate += (qA[d] * dequantScale) * input[d];
}
// B * intermediate -> output
for (let d = 0; d < dim; d++) {
output[d] = (qB[d] * dequantScale) * intermediate;
}
return output;
}
// ========================================================================
// Async updates
// ========================================================================
/**
* Queue an async update
*/
queueAsyncUpdate(update) {
this.pendingUpdates.push(update);
// Schedule processing if not already scheduled
if (!this.updateTimer) {
this.updateTimer = setTimeout(() => {
this.processAsyncUpdates();
}, 100); // Process updates every 100ms
}
}
/**
* Process pending async updates
*/
async processAsyncUpdates() {
this.updateTimer = null;
const updates = this.pendingUpdates;
this.pendingUpdates = [];
// Process up to 5 updates at a time
for (const update of updates.slice(0, 5)) {
try {
await update();
}
catch (error) {
console.error('Edge mode async update failed:', error);
}
}
// Re-queue remaining updates
if (updates.length > 5) {
this.pendingUpdates = updates.slice(5);
this.updateTimer = setTimeout(() => {
this.processAsyncUpdates();
}, 100);
}
}
/**
* Perform lightweight parameter update
*/
async performLightweightUpdate(trajectories, learningRate) {
// Simple exponential moving average update
const alpha = learningRate;
for (const trajectory of trajectories) {
if (trajectory.steps.length === 0)
continue;
// Update compressed patterns based on trajectory success
const lastStep = trajectory.steps[trajectory.steps.length - 1];
const pattern = this.findSimilarCompressedPattern(lastStep.stateAfter);
if (pattern) {
// Update success rate with EMA
const newRate = alpha * trajectory.qualityScore + (1 - alpha) * (pattern.successRate / 255);
pattern.successRate = Math.round(newRate * 255);
pattern.usageCount = Math.min(255, pattern.usageCount + 1);
}
}
}
/**
* Find most similar compressed pattern
*/
findSimilarCompressedPattern(embedding) {
const compressed = this.compressEmbedding(embedding);
let best = null;
let bestSim = -1;
for (const pattern of this.compressedPatterns.values()) {
const sim = this.compressedSimilarity(compressed, pattern.embedding);
if (sim > bestSim) {
bestSim = sim;
best = pattern;
}
}
return best;
}
/**
* Estimate memory usage in MB
*/
estimateMemoryUsage() {
let bytes = 0;
// Compressed patterns
for (const pattern of this.compressedPatterns.values()) {
bytes += pattern.embedding.byteLength + 8; // embedding + overhead
}
// Quantized weights
for (const weights of this.quantizedWeights.values()) {
bytes += weights.byteLength;
}
// Pending updates (minimal)
bytes += this.pendingUpdates.length * 100;
return bytes / (1024 * 1024);
}
}
//# sourceMappingURL=edge.js.map