tasq/node_modules/@claude-flow/embeddings/dist/persistent-cache.js

337 lines
9.9 KiB
JavaScript

/**
* SQLite-backed Persistent Cache for Embeddings (sql.js)
*
* Features:
* - Cross-platform support (pure JavaScript/WASM, no native compilation)
* - Disk persistence across sessions
* - LRU eviction with configurable max size
* - Automatic schema creation
* - TTL support for cache entries
* - Lazy initialization (no startup cost if not used)
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import { dirname } from 'path';
/**
* SQLite-backed persistent embedding cache using sql.js (pure JS/WASM)
*/
export class PersistentEmbeddingCache {
db = null;
SQL = null;
initialized = false;
dirty = false;
hits = 0;
misses = 0;
autoSaveTimer = null;
dbPath;
maxSize;
ttlMs;
autoSaveInterval;
constructor(config) {
this.dbPath = config.dbPath;
this.maxSize = config.maxSize ?? 10000;
this.ttlMs = config.ttlMs ?? 7 * 24 * 60 * 60 * 1000; // 7 days
this.autoSaveInterval = config.autoSaveInterval ?? 30000; // 30 seconds
}
/**
* Lazily initialize database connection
*/
async ensureInitialized() {
if (this.initialized)
return;
try {
// Dynamically import sql.js
const initSqlJs = (await import('sql.js')).default;
// Initialize sql.js (loads WASM)
this.SQL = await initSqlJs();
// Ensure directory exists
const dir = dirname(this.dbPath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
// Load existing database or create new
if (existsSync(this.dbPath)) {
const fileBuffer = readFileSync(this.dbPath);
this.db = new this.SQL.Database(fileBuffer);
}
else {
this.db = new this.SQL.Database();
}
// Create schema
this.db.run(`
CREATE TABLE IF NOT EXISTS embeddings (
key TEXT PRIMARY KEY,
embedding BLOB NOT NULL,
dimensions INTEGER NOT NULL,
created_at INTEGER NOT NULL,
accessed_at INTEGER NOT NULL,
access_count INTEGER DEFAULT 1
)
`);
this.db.run('CREATE INDEX IF NOT EXISTS idx_accessed_at ON embeddings(accessed_at)');
this.db.run('CREATE INDEX IF NOT EXISTS idx_created_at ON embeddings(created_at)');
// Clean expired entries on startup
this.cleanExpired();
// Save after initialization to persist schema
this.saveToFile();
// Start auto-save timer
this.startAutoSave();
this.initialized = true;
}
catch (error) {
// If sql.js not available, fall back gracefully
console.warn('[persistent-cache] sql.js not available, cache disabled:', error instanceof Error ? error.message : error);
this.initialized = true; // Mark as initialized to prevent retry
}
}
/**
* Start auto-save timer
*/
startAutoSave() {
if (this.autoSaveTimer)
return;
this.autoSaveTimer = setInterval(() => {
if (this.dirty && this.db) {
this.saveToFile();
}
}, this.autoSaveInterval);
}
/**
* Stop auto-save timer
*/
stopAutoSave() {
if (this.autoSaveTimer) {
clearInterval(this.autoSaveTimer);
this.autoSaveTimer = null;
}
}
/**
* Save database to file
*/
saveToFile() {
if (!this.db)
return;
try {
const data = this.db.export();
const buffer = Buffer.from(data);
writeFileSync(this.dbPath, buffer);
this.dirty = false;
}
catch (error) {
console.error('[persistent-cache] Save error:', error);
}
}
/**
* Generate cache key from text
*/
hashKey(text) {
// FNV-1a hash for fast, deterministic key generation
let hash = 0x811c9dc5;
for (let i = 0; i < text.length; i++) {
hash ^= text.charCodeAt(i);
hash = (hash * 0x01000193) >>> 0;
}
return `emb_${hash.toString(16)}_${text.length}`;
}
/**
* Serialize Float32Array to Uint8Array for sql.js
*/
serializeEmbedding(embedding) {
return new Uint8Array(embedding.buffer, embedding.byteOffset, embedding.byteLength);
}
/**
* Deserialize Uint8Array to Float32Array
*/
deserializeEmbedding(data, dimensions) {
const buffer = new ArrayBuffer(data.length);
const view = new Uint8Array(buffer);
view.set(data);
return new Float32Array(buffer);
}
/**
* Get embedding from cache
*/
async get(text) {
await this.ensureInitialized();
if (!this.db) {
this.misses++;
return null;
}
const key = this.hashKey(text);
const now = Date.now();
try {
const stmt = this.db.prepare(`
SELECT embedding, dimensions, created_at
FROM embeddings
WHERE key = ?
`);
stmt.bind([key]);
if (!stmt.step()) {
stmt.free();
this.misses++;
return null;
}
const row = stmt.getAsObject();
stmt.free();
// Check TTL
if (now - row.created_at > this.ttlMs) {
this.db.run('DELETE FROM embeddings WHERE key = ?', [key]);
this.dirty = true;
this.misses++;
return null;
}
// Update access time and count
this.db.run(`
UPDATE embeddings
SET accessed_at = ?, access_count = access_count + 1
WHERE key = ?
`, [now, key]);
this.dirty = true;
this.hits++;
return this.deserializeEmbedding(row.embedding, row.dimensions);
}
catch (error) {
console.error('[persistent-cache] Get error:', error);
this.misses++;
return null;
}
}
/**
* Store embedding in cache
*/
async set(text, embedding) {
await this.ensureInitialized();
if (!this.db)
return;
const key = this.hashKey(text);
const now = Date.now();
const data = this.serializeEmbedding(embedding);
try {
// Upsert entry using INSERT OR REPLACE
this.db.run(`
INSERT OR REPLACE INTO embeddings
(key, embedding, dimensions, created_at, accessed_at, access_count)
VALUES (?, ?, ?, ?, ?,
COALESCE((SELECT access_count + 1 FROM embeddings WHERE key = ?), 1)
)
`, [key, data, embedding.length, now, now, key]);
this.dirty = true;
// Check size and evict if needed
await this.evictIfNeeded();
}
catch (error) {
console.error('[persistent-cache] Set error:', error);
}
}
/**
* Evict oldest entries if cache exceeds max size
*/
async evictIfNeeded() {
if (!this.db)
return;
const result = this.db.exec('SELECT COUNT(*) as count FROM embeddings');
const count = result[0]?.values[0]?.[0] ?? 0;
if (count > this.maxSize) {
const toDelete = count - this.maxSize + Math.floor(this.maxSize * 0.1); // Delete 10% extra
this.db.run(`
DELETE FROM embeddings
WHERE key IN (
SELECT key FROM embeddings
ORDER BY accessed_at ASC
LIMIT ?
)
`, [toDelete]);
this.dirty = true;
}
}
/**
* Clean expired entries
*/
cleanExpired() {
if (!this.db)
return;
const cutoff = Date.now() - this.ttlMs;
this.db.run('DELETE FROM embeddings WHERE created_at < ?', [cutoff]);
this.dirty = true;
}
/**
* Get cache statistics
*/
async getStats() {
await this.ensureInitialized();
const total = this.hits + this.misses;
const stats = {
size: 0,
maxSize: this.maxSize,
hitRate: total > 0 ? this.hits / total : 0,
hits: this.hits,
misses: this.misses,
};
if (this.db) {
const result = this.db.exec('SELECT COUNT(*) as count FROM embeddings');
stats.size = result[0]?.values[0]?.[0] ?? 0;
// Get file size if exists
if (existsSync(this.dbPath)) {
try {
const buffer = readFileSync(this.dbPath);
stats.dbSizeBytes = buffer.length;
}
catch {
// Ignore
}
}
}
return stats;
}
/**
* Clear all cached entries
*/
async clear() {
await this.ensureInitialized();
if (!this.db)
return;
this.db.run('DELETE FROM embeddings');
this.dirty = true;
this.hits = 0;
this.misses = 0;
this.saveToFile();
}
/**
* Force save to disk
*/
async flush() {
await this.ensureInitialized();
if (this.db && this.dirty) {
this.saveToFile();
}
}
/**
* Close database connection
*/
async close() {
this.stopAutoSave();
if (this.db) {
// Save before closing
if (this.dirty) {
this.saveToFile();
}
this.db.close();
this.db = null;
this.SQL = null;
this.initialized = false;
}
}
}
/**
* Check if persistent cache is available (sql.js installed)
*/
export async function isPersistentCacheAvailable() {
try {
const initSqlJs = (await import('sql.js')).default;
await initSqlJs();
return true;
}
catch {
return false;
}
}
//# sourceMappingURL=persistent-cache.js.map