310 lines
11 KiB
JavaScript
310 lines
11 KiB
JavaScript
/**
|
|
* Edge Mode Implementation
|
|
*
|
|
* Optimized for resource-constrained environments with:
|
|
* - <5MB memory footprint
|
|
* - Minimal latency (<1ms)
|
|
* - Micro-LoRA (rank-1)
|
|
* - Aggressive pruning
|
|
* - Async updates
|
|
*/
|
|
import { BaseModeImplementation } from './base.js';
|
|
/**
|
|
* Edge mode for resource-constrained devices
|
|
*/
|
|
export class EdgeMode extends BaseModeImplementation {
|
|
mode = 'edge';
|
|
// Minimal pattern storage (compressed)
|
|
compressedPatterns = new Map();
|
|
// Quantized LoRA weights (int8)
|
|
quantizedWeights = new Map();
|
|
quantizationScale = 1.0;
|
|
// Pending async updates
|
|
pendingUpdates = [];
|
|
updateTimer = null;
|
|
// Stats
|
|
totalOps = 0;
|
|
totalTime = 0;
|
|
async initialize() {
|
|
await super.initialize();
|
|
this.compressedPatterns.clear();
|
|
this.quantizedWeights.clear();
|
|
this.pendingUpdates = [];
|
|
}
|
|
async cleanup() {
|
|
if (this.updateTimer) {
|
|
clearTimeout(this.updateTimer);
|
|
}
|
|
this.compressedPatterns.clear();
|
|
this.quantizedWeights.clear();
|
|
this.pendingUpdates = [];
|
|
await super.cleanup();
|
|
}
|
|
/**
|
|
* Find patterns using compressed embeddings
|
|
*/
|
|
async findPatterns(embedding, k, patterns) {
|
|
const startTime = performance.now();
|
|
// Compress input embedding for comparison
|
|
const compressedInput = this.compressEmbedding(embedding);
|
|
// Fast similarity on compressed representations
|
|
const matches = [];
|
|
for (const pattern of patterns) {
|
|
let compressed = this.compressedPatterns.get(pattern.patternId);
|
|
if (!compressed) {
|
|
compressed = this.createCompressedPattern(pattern);
|
|
this.compressedPatterns.set(pattern.patternId, compressed);
|
|
}
|
|
// Use compressed similarity (faster but less accurate)
|
|
const score = this.compressedSimilarity(compressedInput, compressed.embedding);
|
|
matches.push({ pattern, score });
|
|
}
|
|
// Quick partial sort
|
|
matches.sort((a, b) => b.score - a.score);
|
|
const topK = matches.slice(0, k);
|
|
this.totalOps++;
|
|
this.totalTime += performance.now() - startTime;
|
|
return topK.map(m => ({
|
|
pattern: m.pattern,
|
|
similarity: m.score,
|
|
confidence: m.score * m.pattern.successRate,
|
|
latencyMs: 0,
|
|
}));
|
|
}
|
|
/**
|
|
* Lightweight learning with async updates
|
|
*/
|
|
async learn(trajectories, config, ewcState) {
|
|
const startTime = performance.now();
|
|
if (trajectories.length === 0)
|
|
return 0;
|
|
// Edge mode: only learn from high-quality trajectories
|
|
const threshold = config.qualityThreshold;
|
|
const good = trajectories.filter(t => t.qualityScore >= threshold);
|
|
if (good.length === 0)
|
|
return 0;
|
|
const avgQuality = good.reduce((s, t) => s + t.qualityScore, 0) / good.length;
|
|
// Queue async update for later processing
|
|
this.queueAsyncUpdate(async () => {
|
|
await this.performLightweightUpdate(good, config.learningRate);
|
|
});
|
|
this.totalOps++;
|
|
this.totalTime += performance.now() - startTime;
|
|
return Math.max(0, avgQuality - 0.5);
|
|
}
|
|
/**
|
|
* Apply quantized LoRA
|
|
*/
|
|
async applyLoRA(input, weights) {
|
|
if (!weights) {
|
|
return input;
|
|
}
|
|
const output = new Float32Array(input.length);
|
|
output.set(input);
|
|
// Quantize weights if not already done
|
|
for (const module of ['q_proj', 'v_proj']) {
|
|
const A = weights.A.get(module);
|
|
const B = weights.B.get(module);
|
|
if (A && B) {
|
|
const qA = this.getOrQuantize(`A_${module}`, A);
|
|
const qB = this.getOrQuantize(`B_${module}`, B);
|
|
// Apply quantized LoRA
|
|
const adapted = this.applyQuantizedLoRA(input, qA, qB);
|
|
const alpha = 0.05; // Very small blending for edge
|
|
for (let i = 0; i < output.length; i++) {
|
|
output[i] = output[i] * (1 - alpha) + adapted[i] * alpha;
|
|
}
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
getStats() {
|
|
return {
|
|
avgLatencyMs: this.totalOps > 0 ? this.totalTime / this.totalOps : 0,
|
|
compressedPatterns: this.compressedPatterns.size,
|
|
quantizedWeights: this.quantizedWeights.size,
|
|
pendingUpdates: this.pendingUpdates.length,
|
|
memoryEstimateMb: this.estimateMemoryUsage(),
|
|
};
|
|
}
|
|
// ========================================================================
|
|
// Compression utilities
|
|
// ========================================================================
|
|
/**
|
|
* Compress embedding to 8-bit representation
|
|
*/
|
|
compressEmbedding(embedding) {
|
|
const compressed = new Int8Array(embedding.length);
|
|
const max = Math.max(...embedding.map(Math.abs));
|
|
const scale = max > 0 ? 127 / max : 1;
|
|
for (let i = 0; i < embedding.length; i++) {
|
|
compressed[i] = Math.round(embedding[i] * scale);
|
|
}
|
|
return compressed;
|
|
}
|
|
/**
|
|
* Create compressed pattern representation
|
|
*/
|
|
createCompressedPattern(pattern) {
|
|
return {
|
|
id: pattern.patternId,
|
|
embedding: this.compressEmbedding(pattern.embedding),
|
|
successRate: Math.round(pattern.successRate * 255),
|
|
usageCount: Math.min(pattern.usageCount, 255),
|
|
};
|
|
}
|
|
/**
|
|
* Fast similarity on compressed embeddings
|
|
*/
|
|
compressedSimilarity(a, b) {
|
|
if (a.length !== b.length)
|
|
return 0;
|
|
let dot = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
// Process in chunks of 8 for better cache utilization
|
|
const len = a.length;
|
|
for (let i = 0; i < len; i++) {
|
|
dot += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
const denom = Math.sqrt(normA * normB);
|
|
return denom > 0 ? dot / denom : 0;
|
|
}
|
|
/**
|
|
* Get or create quantized weights
|
|
*/
|
|
getOrQuantize(key, weights) {
|
|
let quantized = this.quantizedWeights.get(key);
|
|
if (!quantized) {
|
|
quantized = this.quantizeWeights(weights);
|
|
this.quantizedWeights.set(key, quantized);
|
|
}
|
|
return quantized;
|
|
}
|
|
/**
|
|
* Quantize float weights to int8
|
|
*/
|
|
quantizeWeights(weights) {
|
|
const max = Math.max(...weights.map(Math.abs));
|
|
this.quantizationScale = max > 0 ? 127 / max : 1;
|
|
const quantized = new Int8Array(weights.length);
|
|
for (let i = 0; i < weights.length; i++) {
|
|
quantized[i] = Math.round(weights[i] * this.quantizationScale);
|
|
}
|
|
return quantized;
|
|
}
|
|
/**
|
|
* Apply LoRA with quantized weights
|
|
*/
|
|
applyQuantizedLoRA(input, qA, qB) {
|
|
const dim = input.length;
|
|
const rank = 1; // Edge mode uses rank-1
|
|
const output = new Float32Array(dim);
|
|
const dequantScale = 1 / this.quantizationScale;
|
|
// A * input -> intermediate (scalar for rank-1)
|
|
let intermediate = 0;
|
|
for (let d = 0; d < dim; d++) {
|
|
intermediate += (qA[d] * dequantScale) * input[d];
|
|
}
|
|
// B * intermediate -> output
|
|
for (let d = 0; d < dim; d++) {
|
|
output[d] = (qB[d] * dequantScale) * intermediate;
|
|
}
|
|
return output;
|
|
}
|
|
// ========================================================================
|
|
// Async updates
|
|
// ========================================================================
|
|
/**
|
|
* Queue an async update
|
|
*/
|
|
queueAsyncUpdate(update) {
|
|
this.pendingUpdates.push(update);
|
|
// Schedule processing if not already scheduled
|
|
if (!this.updateTimer) {
|
|
this.updateTimer = setTimeout(() => {
|
|
this.processAsyncUpdates();
|
|
}, 100); // Process updates every 100ms
|
|
}
|
|
}
|
|
/**
|
|
* Process pending async updates
|
|
*/
|
|
async processAsyncUpdates() {
|
|
this.updateTimer = null;
|
|
const updates = this.pendingUpdates;
|
|
this.pendingUpdates = [];
|
|
// Process up to 5 updates at a time
|
|
for (const update of updates.slice(0, 5)) {
|
|
try {
|
|
await update();
|
|
}
|
|
catch (error) {
|
|
console.error('Edge mode async update failed:', error);
|
|
}
|
|
}
|
|
// Re-queue remaining updates
|
|
if (updates.length > 5) {
|
|
this.pendingUpdates = updates.slice(5);
|
|
this.updateTimer = setTimeout(() => {
|
|
this.processAsyncUpdates();
|
|
}, 100);
|
|
}
|
|
}
|
|
/**
|
|
* Perform lightweight parameter update
|
|
*/
|
|
async performLightweightUpdate(trajectories, learningRate) {
|
|
// Simple exponential moving average update
|
|
const alpha = learningRate;
|
|
for (const trajectory of trajectories) {
|
|
if (trajectory.steps.length === 0)
|
|
continue;
|
|
// Update compressed patterns based on trajectory success
|
|
const lastStep = trajectory.steps[trajectory.steps.length - 1];
|
|
const pattern = this.findSimilarCompressedPattern(lastStep.stateAfter);
|
|
if (pattern) {
|
|
// Update success rate with EMA
|
|
const newRate = alpha * trajectory.qualityScore + (1 - alpha) * (pattern.successRate / 255);
|
|
pattern.successRate = Math.round(newRate * 255);
|
|
pattern.usageCount = Math.min(255, pattern.usageCount + 1);
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Find most similar compressed pattern
|
|
*/
|
|
findSimilarCompressedPattern(embedding) {
|
|
const compressed = this.compressEmbedding(embedding);
|
|
let best = null;
|
|
let bestSim = -1;
|
|
for (const pattern of this.compressedPatterns.values()) {
|
|
const sim = this.compressedSimilarity(compressed, pattern.embedding);
|
|
if (sim > bestSim) {
|
|
bestSim = sim;
|
|
best = pattern;
|
|
}
|
|
}
|
|
return best;
|
|
}
|
|
/**
|
|
* Estimate memory usage in MB
|
|
*/
|
|
estimateMemoryUsage() {
|
|
let bytes = 0;
|
|
// Compressed patterns
|
|
for (const pattern of this.compressedPatterns.values()) {
|
|
bytes += pattern.embedding.byteLength + 8; // embedding + overhead
|
|
}
|
|
// Quantized weights
|
|
for (const weights of this.quantizedWeights.values()) {
|
|
bytes += weights.byteLength;
|
|
}
|
|
// Pending updates (minimal)
|
|
bytes += this.pendingUpdates.length * 100;
|
|
return bytes / (1024 * 1024);
|
|
}
|
|
}
|
|
//# sourceMappingURL=edge.js.map
|