/** * SQLite-backed Persistent Cache for Embeddings (sql.js) * * Features: * - Cross-platform support (pure JavaScript/WASM, no native compilation) * - Disk persistence across sessions * - LRU eviction with configurable max size * - Automatic schema creation * - TTL support for cache entries * - Lazy initialization (no startup cost if not used) */ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; import { dirname } from 'path'; /** * SQLite-backed persistent embedding cache using sql.js (pure JS/WASM) */ export class PersistentEmbeddingCache { db = null; SQL = null; initialized = false; dirty = false; hits = 0; misses = 0; autoSaveTimer = null; dbPath; maxSize; ttlMs; autoSaveInterval; constructor(config) { this.dbPath = config.dbPath; this.maxSize = config.maxSize ?? 10000; this.ttlMs = config.ttlMs ?? 7 * 24 * 60 * 60 * 1000; // 7 days this.autoSaveInterval = config.autoSaveInterval ?? 30000; // 30 seconds } /** * Lazily initialize database connection */ async ensureInitialized() { if (this.initialized) return; try { // Dynamically import sql.js const initSqlJs = (await import('sql.js')).default; // Initialize sql.js (loads WASM) this.SQL = await initSqlJs(); // Ensure directory exists const dir = dirname(this.dbPath); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } // Load existing database or create new if (existsSync(this.dbPath)) { const fileBuffer = readFileSync(this.dbPath); this.db = new this.SQL.Database(fileBuffer); } else { this.db = new this.SQL.Database(); } // Create schema this.db.run(` CREATE TABLE IF NOT EXISTS embeddings ( key TEXT PRIMARY KEY, embedding BLOB NOT NULL, dimensions INTEGER NOT NULL, created_at INTEGER NOT NULL, accessed_at INTEGER NOT NULL, access_count INTEGER DEFAULT 1 ) `); this.db.run('CREATE INDEX IF NOT EXISTS idx_accessed_at ON embeddings(accessed_at)'); this.db.run('CREATE INDEX IF NOT EXISTS idx_created_at ON embeddings(created_at)'); // Clean expired entries on startup this.cleanExpired(); // Save after initialization to persist schema this.saveToFile(); // Start auto-save timer this.startAutoSave(); this.initialized = true; } catch (error) { // If sql.js not available, fall back gracefully console.warn('[persistent-cache] sql.js not available, cache disabled:', error instanceof Error ? error.message : error); this.initialized = true; // Mark as initialized to prevent retry } } /** * Start auto-save timer */ startAutoSave() { if (this.autoSaveTimer) return; this.autoSaveTimer = setInterval(() => { if (this.dirty && this.db) { this.saveToFile(); } }, this.autoSaveInterval); } /** * Stop auto-save timer */ stopAutoSave() { if (this.autoSaveTimer) { clearInterval(this.autoSaveTimer); this.autoSaveTimer = null; } } /** * Save database to file */ saveToFile() { if (!this.db) return; try { const data = this.db.export(); const buffer = Buffer.from(data); writeFileSync(this.dbPath, buffer); this.dirty = false; } catch (error) { console.error('[persistent-cache] Save error:', error); } } /** * Generate cache key from text */ hashKey(text) { // FNV-1a hash for fast, deterministic key generation let hash = 0x811c9dc5; for (let i = 0; i < text.length; i++) { hash ^= text.charCodeAt(i); hash = (hash * 0x01000193) >>> 0; } return `emb_${hash.toString(16)}_${text.length}`; } /** * Serialize Float32Array to Uint8Array for sql.js */ serializeEmbedding(embedding) { return new Uint8Array(embedding.buffer, embedding.byteOffset, embedding.byteLength); } /** * Deserialize Uint8Array to Float32Array */ deserializeEmbedding(data, dimensions) { const buffer = new ArrayBuffer(data.length); const view = new Uint8Array(buffer); view.set(data); return new Float32Array(buffer); } /** * Get embedding from cache */ async get(text) { await this.ensureInitialized(); if (!this.db) { this.misses++; return null; } const key = this.hashKey(text); const now = Date.now(); try { const stmt = this.db.prepare(` SELECT embedding, dimensions, created_at FROM embeddings WHERE key = ? `); stmt.bind([key]); if (!stmt.step()) { stmt.free(); this.misses++; return null; } const row = stmt.getAsObject(); stmt.free(); // Check TTL if (now - row.created_at > this.ttlMs) { this.db.run('DELETE FROM embeddings WHERE key = ?', [key]); this.dirty = true; this.misses++; return null; } // Update access time and count this.db.run(` UPDATE embeddings SET accessed_at = ?, access_count = access_count + 1 WHERE key = ? `, [now, key]); this.dirty = true; this.hits++; return this.deserializeEmbedding(row.embedding, row.dimensions); } catch (error) { console.error('[persistent-cache] Get error:', error); this.misses++; return null; } } /** * Store embedding in cache */ async set(text, embedding) { await this.ensureInitialized(); if (!this.db) return; const key = this.hashKey(text); const now = Date.now(); const data = this.serializeEmbedding(embedding); try { // Upsert entry using INSERT OR REPLACE this.db.run(` INSERT OR REPLACE INTO embeddings (key, embedding, dimensions, created_at, accessed_at, access_count) VALUES (?, ?, ?, ?, ?, COALESCE((SELECT access_count + 1 FROM embeddings WHERE key = ?), 1) ) `, [key, data, embedding.length, now, now, key]); this.dirty = true; // Check size and evict if needed await this.evictIfNeeded(); } catch (error) { console.error('[persistent-cache] Set error:', error); } } /** * Evict oldest entries if cache exceeds max size */ async evictIfNeeded() { if (!this.db) return; const result = this.db.exec('SELECT COUNT(*) as count FROM embeddings'); const count = result[0]?.values[0]?.[0] ?? 0; if (count > this.maxSize) { const toDelete = count - this.maxSize + Math.floor(this.maxSize * 0.1); // Delete 10% extra this.db.run(` DELETE FROM embeddings WHERE key IN ( SELECT key FROM embeddings ORDER BY accessed_at ASC LIMIT ? ) `, [toDelete]); this.dirty = true; } } /** * Clean expired entries */ cleanExpired() { if (!this.db) return; const cutoff = Date.now() - this.ttlMs; this.db.run('DELETE FROM embeddings WHERE created_at < ?', [cutoff]); this.dirty = true; } /** * Get cache statistics */ async getStats() { await this.ensureInitialized(); const total = this.hits + this.misses; const stats = { size: 0, maxSize: this.maxSize, hitRate: total > 0 ? this.hits / total : 0, hits: this.hits, misses: this.misses, }; if (this.db) { const result = this.db.exec('SELECT COUNT(*) as count FROM embeddings'); stats.size = result[0]?.values[0]?.[0] ?? 0; // Get file size if exists if (existsSync(this.dbPath)) { try { const buffer = readFileSync(this.dbPath); stats.dbSizeBytes = buffer.length; } catch { // Ignore } } } return stats; } /** * Clear all cached entries */ async clear() { await this.ensureInitialized(); if (!this.db) return; this.db.run('DELETE FROM embeddings'); this.dirty = true; this.hits = 0; this.misses = 0; this.saveToFile(); } /** * Force save to disk */ async flush() { await this.ensureInitialized(); if (this.db && this.dirty) { this.saveToFile(); } } /** * Close database connection */ async close() { this.stopAutoSave(); if (this.db) { // Save before closing if (this.dirty) { this.saveToFile(); } this.db.close(); this.db = null; this.SQL = null; this.initialized = false; } } } /** * Check if persistent cache is available (sql.js installed) */ export async function isPersistentCacheAvailable() { try { const initSqlJs = (await import('sql.js')).default; await initSqlJs(); return true; } catch { return false; } } //# sourceMappingURL=persistent-cache.js.map