tasq/node_modules/@claude-flow/memory/dist/rvf-backend.js

481 lines
18 KiB
JavaScript

import { existsSync } from 'node:fs';
import { readFile, writeFile, mkdir, rename } from 'node:fs/promises';
import { dirname, resolve } from 'node:path';
import { HnswLite, cosineSimilarity } from './hnsw-lite.js';
/** Validate a file path is safe (no null bytes, no traversal above root) */
function validatePath(p) {
if (p === ':memory:')
return;
if (p.includes('\0'))
throw new Error('Path contains null bytes');
const resolved = resolve(p);
if (resolved.includes('\0'))
throw new Error('Resolved path contains null bytes');
}
const MAGIC = 'RVF\0';
const VERSION = 1;
const DEFAULT_DIMENSIONS = 1536;
const DEFAULT_M = 16;
const DEFAULT_EF_CONSTRUCTION = 200;
const DEFAULT_MAX_ELEMENTS = 100000;
const DEFAULT_PERSIST_INTERVAL = 30000;
export class RvfBackend {
entries = new Map();
keyIndex = new Map();
hnswIndex = null;
nativeDb = null;
config;
initialized = false;
dirty = false;
persisting = false;
persistTimer = null;
queryTimes = [];
searchTimes = [];
constructor(config) {
const dimensions = config.dimensions ?? DEFAULT_DIMENSIONS;
if (!Number.isInteger(dimensions) || dimensions < 1 || dimensions > 10000) {
throw new Error(`Invalid dimensions: ${dimensions}. Must be an integer between 1 and 10000.`);
}
this.config = {
databasePath: config.databasePath,
dimensions,
metric: config.metric ?? 'cosine',
quantization: config.quantization ?? 'fp32',
hnswM: config.hnswM ?? DEFAULT_M,
hnswEfConstruction: config.hnswEfConstruction ?? DEFAULT_EF_CONSTRUCTION,
maxElements: config.maxElements ?? DEFAULT_MAX_ELEMENTS,
verbose: config.verbose ?? false,
defaultNamespace: config.defaultNamespace ?? 'default',
autoPersistInterval: config.autoPersistInterval ?? DEFAULT_PERSIST_INTERVAL,
};
validatePath(this.config.databasePath);
}
async initialize() {
if (this.initialized)
return;
const useNative = await this.tryNativeInit();
if (!useNative) {
this.hnswIndex = new HnswLite(this.config.dimensions, this.config.hnswM, this.config.hnswEfConstruction, this.config.metric);
await this.loadFromDisk();
}
if (this.config.autoPersistInterval > 0 && this.config.databasePath !== ':memory:') {
this.persistTimer = setInterval(() => {
if (this.dirty && !this.persisting)
this.persistToDisk().catch(() => { });
}, this.config.autoPersistInterval);
if (this.persistTimer.unref)
this.persistTimer.unref();
}
this.initialized = true;
if (this.config.verbose) {
const mode = this.nativeDb ? 'native @ruvector/rvf' : 'pure-TS fallback';
console.log(`[RvfBackend] Initialized (${mode}), ${this.entries.size} entries loaded`);
}
}
async shutdown() {
if (!this.initialized)
return;
if (this.persistTimer) {
clearInterval(this.persistTimer);
this.persistTimer = null;
}
if (this.dirty) {
await this.persistToDisk();
}
if (this.nativeDb) {
try {
await this.nativeDb.close();
}
catch { }
this.nativeDb = null;
}
this.entries.clear();
this.keyIndex.clear();
this.hnswIndex = null;
this.initialized = false;
}
async store(entry) {
const ns = entry.namespace || this.config.defaultNamespace;
const e = ns !== entry.namespace ? { ...entry, namespace: ns } : entry;
this.entries.set(e.id, e);
this.keyIndex.set(this.compositeKey(e.namespace, e.key), e.id);
if (e.embedding && this.hnswIndex) {
this.hnswIndex.add(e.id, e.embedding);
}
this.dirty = true;
}
async get(id) {
const entry = this.entries.get(id);
if (!entry)
return null;
entry.accessCount++;
entry.lastAccessedAt = Date.now();
return entry;
}
async getByKey(namespace, key) {
const id = this.keyIndex.get(this.compositeKey(namespace, key));
if (!id)
return null;
return this.get(id);
}
async update(id, updateData) {
const entry = this.entries.get(id);
if (!entry)
return null;
const updated = {
...entry,
...updateData,
updatedAt: Date.now(),
version: entry.version + 1,
};
this.entries.set(id, updated);
this.dirty = true;
return updated;
}
async delete(id) {
const entry = this.entries.get(id);
if (!entry)
return false;
this.entries.delete(id);
this.keyIndex.delete(this.compositeKey(entry.namespace, entry.key));
if (this.hnswIndex)
this.hnswIndex.remove(id);
this.dirty = true;
return true;
}
async query(q) {
const start = performance.now();
let results = Array.from(this.entries.values());
if (q.namespace)
results = results.filter(e => e.namespace === q.namespace);
if (q.key)
results = results.filter(e => e.key === q.key);
if (q.keyPrefix)
results = results.filter(e => e.key.startsWith(q.keyPrefix));
if (q.tags?.length)
results = results.filter(e => q.tags.every(t => e.tags.includes(t)));
if (q.memoryType)
results = results.filter(e => e.type === q.memoryType);
if (q.accessLevel)
results = results.filter(e => e.accessLevel === q.accessLevel);
if (q.ownerId)
results = results.filter(e => e.ownerId === q.ownerId);
if (q.createdAfter)
results = results.filter(e => e.createdAt > q.createdAfter);
if (q.createdBefore)
results = results.filter(e => e.createdAt < q.createdBefore);
if (q.updatedAfter)
results = results.filter(e => e.updatedAt > q.updatedAfter);
if (q.updatedBefore)
results = results.filter(e => e.updatedAt < q.updatedBefore);
if (!q.includeExpired) {
const now = Date.now();
results = results.filter(e => !e.expiresAt || e.expiresAt > now);
}
if (q.type === 'semantic' && q.embedding && this.hnswIndex) {
const searchResults = this.hnswIndex.search(q.embedding, q.limit, q.threshold);
const idSet = new Set(searchResults.map(r => r.id));
results = results.filter(e => idSet.has(e.id));
}
const offset = q.offset ?? 0;
results = results.slice(offset, offset + q.limit);
this.recordTiming(this.queryTimes, start);
return results;
}
async search(embedding, options) {
const start = performance.now();
let results;
if (this.hnswIndex) {
const raw = this.hnswIndex.search(embedding, options.k * 2, options.threshold);
results = [];
for (const r of raw) {
const entry = this.entries.get(r.id);
if (!entry)
continue;
if (options.filters?.namespace && entry.namespace !== options.filters.namespace)
continue;
if (options.filters?.tags && !options.filters.tags.every(t => entry.tags.includes(t)))
continue;
if (options.filters?.memoryType && entry.type !== options.filters.memoryType)
continue;
results.push({ entry, score: r.score, distance: 1 - r.score });
}
results = results.slice(0, options.k);
}
else {
results = this.bruteForceSearch(embedding, options);
}
this.recordTiming(this.searchTimes, start);
return results;
}
async bulkInsert(entries) {
for (const entry of entries) {
this.entries.set(entry.id, entry);
this.keyIndex.set(this.compositeKey(entry.namespace, entry.key), entry.id);
if (entry.embedding && this.hnswIndex)
this.hnswIndex.add(entry.id, entry.embedding);
}
this.dirty = true;
}
async bulkDelete(ids) {
let count = 0;
for (const id of ids) {
const entry = this.entries.get(id);
if (entry) {
this.entries.delete(id);
this.keyIndex.delete(this.compositeKey(entry.namespace, entry.key));
if (this.hnswIndex)
this.hnswIndex.remove(id);
count++;
}
}
this.dirty = true;
return count;
}
async count(namespace) {
if (!namespace)
return this.entries.size;
let c = 0;
for (const entry of this.entries.values()) {
if (entry.namespace === namespace)
c++;
}
return c;
}
async listNamespaces() {
const ns = new Set();
for (const entry of this.entries.values())
ns.add(entry.namespace);
return Array.from(ns);
}
async clearNamespace(namespace) {
const toDelete = [];
for (const [id, entry] of this.entries) {
if (entry.namespace === namespace)
toDelete.push(id);
}
for (const id of toDelete) {
const entry = this.entries.get(id);
this.entries.delete(id);
this.keyIndex.delete(this.compositeKey(entry.namespace, entry.key));
if (this.hnswIndex)
this.hnswIndex.remove(id);
}
if (toDelete.length > 0)
this.dirty = true;
return toDelete.length;
}
async getStats() {
const entriesByNamespace = {};
const entriesByType = {};
let memoryUsage = 0;
for (const entry of this.entries.values()) {
entriesByNamespace[entry.namespace] = (entriesByNamespace[entry.namespace] ?? 0) + 1;
entriesByType[entry.type] = (entriesByType[entry.type] ?? 0) + 1;
memoryUsage += entry.content.length * 2;
if (entry.embedding)
memoryUsage += entry.embedding.byteLength;
}
const avgQuery = this.avg(this.queryTimes);
const avgSearch = this.avg(this.searchTimes);
return {
totalEntries: this.entries.size,
entriesByNamespace,
entriesByType: entriesByType,
memoryUsage,
hnswStats: this.hnswIndex ? {
vectorCount: this.hnswIndex.size,
memoryUsage: this.hnswIndex.size * this.config.dimensions * 4,
avgSearchTime: avgSearch,
buildTime: 0,
} : undefined,
avgQueryTime: avgQuery,
avgSearchTime: avgSearch,
};
}
async healthCheck() {
const issues = [];
const recommendations = [];
if (!this.initialized)
issues.push('Backend not initialized');
if (!this.hnswIndex && !this.nativeDb) {
issues.push('No vector index available');
recommendations.push('Install @ruvector/rvf for native HNSW performance');
}
const status = issues.length === 0
? 'healthy'
: issues.some(i => i.includes('not initialized')) ? 'unhealthy' : 'degraded';
return {
status,
components: {
storage: { status: this.initialized ? 'healthy' : 'unhealthy', latency: 0 },
index: { status: this.hnswIndex || this.nativeDb ? 'healthy' : 'degraded', latency: 0 },
cache: { status: 'healthy', latency: 0 },
},
timestamp: Date.now(),
issues,
recommendations,
};
}
async tryNativeInit() {
try {
const rvf = await import('@ruvector/rvf');
this.nativeDb = new rvf.RvfDatabase({
path: this.config.databasePath,
dimensions: this.config.dimensions,
metric: this.config.metric,
quantization: this.config.quantization,
hnswM: this.config.hnswM,
hnswEfConstruction: this.config.hnswEfConstruction,
maxElements: this.config.maxElements,
});
await this.nativeDb.open();
if (this.config.verbose) {
console.log('[RvfBackend] Native @ruvector/rvf loaded successfully');
}
return true;
}
catch {
if (this.config.verbose) {
console.log('[RvfBackend] @ruvector/rvf not available, using pure-TS fallback');
}
return false;
}
}
compositeKey(namespace, key) {
return `${namespace}\0${key}`;
}
bruteForceSearch(embedding, options) {
const results = [];
for (const entry of this.entries.values()) {
if (!entry.embedding)
continue;
const score = cosineSimilarity(embedding, entry.embedding);
if (options.threshold && score < options.threshold)
continue;
if (options.filters?.namespace && entry.namespace !== options.filters.namespace)
continue;
if (options.filters?.tags && !options.filters.tags.every(t => entry.tags.includes(t)))
continue;
results.push({ entry, score, distance: 1 - score });
}
results.sort((a, b) => b.score - a.score);
return results.slice(0, options.k);
}
recordTiming(arr, start) {
arr.push(performance.now() - start);
if (arr.length > 100)
arr.shift();
}
avg(arr) {
return arr.length > 0 ? arr.reduce((a, b) => a + b, 0) / arr.length : 0;
}
async loadFromDisk() {
if (this.config.databasePath === ':memory:')
return;
if (!existsSync(this.config.databasePath))
return;
try {
const raw = await readFile(this.config.databasePath);
if (raw.length < 8)
return;
const magic = String.fromCharCode(raw[0], raw[1], raw[2], raw[3]);
if (magic !== MAGIC)
return;
const headerLen = raw.readUInt32LE(4);
const MAX_HEADER_SIZE = 10 * 1024 * 1024; // 10MB max header
if (headerLen > MAX_HEADER_SIZE || 8 + headerLen > raw.length)
return;
const headerJson = raw.subarray(8, 8 + headerLen).toString('utf-8');
let header;
try {
header = JSON.parse(headerJson);
}
catch {
if (this.config.verbose)
console.error('[RvfBackend] Corrupt RVF header');
return;
}
if (!header || typeof header.entryCount !== 'number' || typeof header.version !== 'number')
return;
let offset = 8 + headerLen;
for (let i = 0; i < header.entryCount; i++) {
if (offset + 4 > raw.length)
break;
const entryLen = raw.readUInt32LE(offset);
offset += 4;
if (offset + entryLen > raw.length)
break;
const entryJson = raw.subarray(offset, offset + entryLen).toString('utf-8');
offset += entryLen;
const parsed = JSON.parse(entryJson);
if (parsed.embedding)
parsed.embedding = new Float32Array(parsed.embedding);
const entry = parsed;
this.entries.set(entry.id, entry);
this.keyIndex.set(this.compositeKey(entry.namespace, entry.key), entry.id);
if (entry.embedding && this.hnswIndex)
this.hnswIndex.add(entry.id, entry.embedding);
}
}
catch (err) {
if (this.config.verbose) {
console.error('[RvfBackend] Error loading from disk:', err);
}
}
}
async persistToDisk() {
if (this.config.databasePath === ':memory:')
return;
if (this.persisting)
return; // Prevent concurrent persist calls
this.persisting = true;
try {
const dir = dirname(this.config.databasePath);
if (!existsSync(dir))
await mkdir(dir, { recursive: true });
const entries = Array.from(this.entries.values());
// Compute min createdAt without spread operator (avoids stack overflow for large arrays)
let minCreatedAt = Date.now();
for (const e of entries) {
if (e.createdAt < minCreatedAt)
minCreatedAt = e.createdAt;
}
const header = {
magic: MAGIC,
version: VERSION,
dimensions: this.config.dimensions,
metric: this.config.metric,
quantization: this.config.quantization,
entryCount: entries.length,
createdAt: entries.length > 0 ? minCreatedAt : Date.now(),
updatedAt: Date.now(),
};
const headerBuf = Buffer.from(JSON.stringify(header), 'utf-8');
const entryBuffers = [];
for (const entry of entries) {
const serialized = {
...entry,
embedding: entry.embedding ? Array.from(entry.embedding) : undefined,
};
const buf = Buffer.from(JSON.stringify(serialized), 'utf-8');
const lenBuf = Buffer.alloc(4);
lenBuf.writeUInt32LE(buf.length, 0);
entryBuffers.push(lenBuf, buf);
}
const magicBuf = Buffer.from([0x52, 0x56, 0x46, 0x00]);
const headerLenBuf = Buffer.alloc(4);
headerLenBuf.writeUInt32LE(headerBuf.length, 0);
const output = Buffer.concat([magicBuf, headerLenBuf, headerBuf, ...entryBuffers]);
// Atomic write: write to temp file then rename (crash-safe)
const tmpPath = this.config.databasePath + '.tmp';
await writeFile(tmpPath, output);
await rename(tmpPath, this.config.databasePath);
this.dirty = false;
}
finally {
this.persisting = false;
}
}
}
//# sourceMappingURL=rvf-backend.js.map