481 lines
18 KiB
JavaScript
481 lines
18 KiB
JavaScript
import { existsSync } from 'node:fs';
|
|
import { readFile, writeFile, mkdir, rename } from 'node:fs/promises';
|
|
import { dirname, resolve } from 'node:path';
|
|
import { HnswLite, cosineSimilarity } from './hnsw-lite.js';
|
|
/** Validate a file path is safe (no null bytes, no traversal above root) */
|
|
function validatePath(p) {
|
|
if (p === ':memory:')
|
|
return;
|
|
if (p.includes('\0'))
|
|
throw new Error('Path contains null bytes');
|
|
const resolved = resolve(p);
|
|
if (resolved.includes('\0'))
|
|
throw new Error('Resolved path contains null bytes');
|
|
}
|
|
const MAGIC = 'RVF\0';
|
|
const VERSION = 1;
|
|
const DEFAULT_DIMENSIONS = 1536;
|
|
const DEFAULT_M = 16;
|
|
const DEFAULT_EF_CONSTRUCTION = 200;
|
|
const DEFAULT_MAX_ELEMENTS = 100000;
|
|
const DEFAULT_PERSIST_INTERVAL = 30000;
|
|
export class RvfBackend {
|
|
entries = new Map();
|
|
keyIndex = new Map();
|
|
hnswIndex = null;
|
|
nativeDb = null;
|
|
config;
|
|
initialized = false;
|
|
dirty = false;
|
|
persisting = false;
|
|
persistTimer = null;
|
|
queryTimes = [];
|
|
searchTimes = [];
|
|
constructor(config) {
|
|
const dimensions = config.dimensions ?? DEFAULT_DIMENSIONS;
|
|
if (!Number.isInteger(dimensions) || dimensions < 1 || dimensions > 10000) {
|
|
throw new Error(`Invalid dimensions: ${dimensions}. Must be an integer between 1 and 10000.`);
|
|
}
|
|
this.config = {
|
|
databasePath: config.databasePath,
|
|
dimensions,
|
|
metric: config.metric ?? 'cosine',
|
|
quantization: config.quantization ?? 'fp32',
|
|
hnswM: config.hnswM ?? DEFAULT_M,
|
|
hnswEfConstruction: config.hnswEfConstruction ?? DEFAULT_EF_CONSTRUCTION,
|
|
maxElements: config.maxElements ?? DEFAULT_MAX_ELEMENTS,
|
|
verbose: config.verbose ?? false,
|
|
defaultNamespace: config.defaultNamespace ?? 'default',
|
|
autoPersistInterval: config.autoPersistInterval ?? DEFAULT_PERSIST_INTERVAL,
|
|
};
|
|
validatePath(this.config.databasePath);
|
|
}
|
|
async initialize() {
|
|
if (this.initialized)
|
|
return;
|
|
const useNative = await this.tryNativeInit();
|
|
if (!useNative) {
|
|
this.hnswIndex = new HnswLite(this.config.dimensions, this.config.hnswM, this.config.hnswEfConstruction, this.config.metric);
|
|
await this.loadFromDisk();
|
|
}
|
|
if (this.config.autoPersistInterval > 0 && this.config.databasePath !== ':memory:') {
|
|
this.persistTimer = setInterval(() => {
|
|
if (this.dirty && !this.persisting)
|
|
this.persistToDisk().catch(() => { });
|
|
}, this.config.autoPersistInterval);
|
|
if (this.persistTimer.unref)
|
|
this.persistTimer.unref();
|
|
}
|
|
this.initialized = true;
|
|
if (this.config.verbose) {
|
|
const mode = this.nativeDb ? 'native @ruvector/rvf' : 'pure-TS fallback';
|
|
console.log(`[RvfBackend] Initialized (${mode}), ${this.entries.size} entries loaded`);
|
|
}
|
|
}
|
|
async shutdown() {
|
|
if (!this.initialized)
|
|
return;
|
|
if (this.persistTimer) {
|
|
clearInterval(this.persistTimer);
|
|
this.persistTimer = null;
|
|
}
|
|
if (this.dirty) {
|
|
await this.persistToDisk();
|
|
}
|
|
if (this.nativeDb) {
|
|
try {
|
|
await this.nativeDb.close();
|
|
}
|
|
catch { }
|
|
this.nativeDb = null;
|
|
}
|
|
this.entries.clear();
|
|
this.keyIndex.clear();
|
|
this.hnswIndex = null;
|
|
this.initialized = false;
|
|
}
|
|
async store(entry) {
|
|
const ns = entry.namespace || this.config.defaultNamespace;
|
|
const e = ns !== entry.namespace ? { ...entry, namespace: ns } : entry;
|
|
this.entries.set(e.id, e);
|
|
this.keyIndex.set(this.compositeKey(e.namespace, e.key), e.id);
|
|
if (e.embedding && this.hnswIndex) {
|
|
this.hnswIndex.add(e.id, e.embedding);
|
|
}
|
|
this.dirty = true;
|
|
}
|
|
async get(id) {
|
|
const entry = this.entries.get(id);
|
|
if (!entry)
|
|
return null;
|
|
entry.accessCount++;
|
|
entry.lastAccessedAt = Date.now();
|
|
return entry;
|
|
}
|
|
async getByKey(namespace, key) {
|
|
const id = this.keyIndex.get(this.compositeKey(namespace, key));
|
|
if (!id)
|
|
return null;
|
|
return this.get(id);
|
|
}
|
|
async update(id, updateData) {
|
|
const entry = this.entries.get(id);
|
|
if (!entry)
|
|
return null;
|
|
const updated = {
|
|
...entry,
|
|
...updateData,
|
|
updatedAt: Date.now(),
|
|
version: entry.version + 1,
|
|
};
|
|
this.entries.set(id, updated);
|
|
this.dirty = true;
|
|
return updated;
|
|
}
|
|
async delete(id) {
|
|
const entry = this.entries.get(id);
|
|
if (!entry)
|
|
return false;
|
|
this.entries.delete(id);
|
|
this.keyIndex.delete(this.compositeKey(entry.namespace, entry.key));
|
|
if (this.hnswIndex)
|
|
this.hnswIndex.remove(id);
|
|
this.dirty = true;
|
|
return true;
|
|
}
|
|
async query(q) {
|
|
const start = performance.now();
|
|
let results = Array.from(this.entries.values());
|
|
if (q.namespace)
|
|
results = results.filter(e => e.namespace === q.namespace);
|
|
if (q.key)
|
|
results = results.filter(e => e.key === q.key);
|
|
if (q.keyPrefix)
|
|
results = results.filter(e => e.key.startsWith(q.keyPrefix));
|
|
if (q.tags?.length)
|
|
results = results.filter(e => q.tags.every(t => e.tags.includes(t)));
|
|
if (q.memoryType)
|
|
results = results.filter(e => e.type === q.memoryType);
|
|
if (q.accessLevel)
|
|
results = results.filter(e => e.accessLevel === q.accessLevel);
|
|
if (q.ownerId)
|
|
results = results.filter(e => e.ownerId === q.ownerId);
|
|
if (q.createdAfter)
|
|
results = results.filter(e => e.createdAt > q.createdAfter);
|
|
if (q.createdBefore)
|
|
results = results.filter(e => e.createdAt < q.createdBefore);
|
|
if (q.updatedAfter)
|
|
results = results.filter(e => e.updatedAt > q.updatedAfter);
|
|
if (q.updatedBefore)
|
|
results = results.filter(e => e.updatedAt < q.updatedBefore);
|
|
if (!q.includeExpired) {
|
|
const now = Date.now();
|
|
results = results.filter(e => !e.expiresAt || e.expiresAt > now);
|
|
}
|
|
if (q.type === 'semantic' && q.embedding && this.hnswIndex) {
|
|
const searchResults = this.hnswIndex.search(q.embedding, q.limit, q.threshold);
|
|
const idSet = new Set(searchResults.map(r => r.id));
|
|
results = results.filter(e => idSet.has(e.id));
|
|
}
|
|
const offset = q.offset ?? 0;
|
|
results = results.slice(offset, offset + q.limit);
|
|
this.recordTiming(this.queryTimes, start);
|
|
return results;
|
|
}
|
|
async search(embedding, options) {
|
|
const start = performance.now();
|
|
let results;
|
|
if (this.hnswIndex) {
|
|
const raw = this.hnswIndex.search(embedding, options.k * 2, options.threshold);
|
|
results = [];
|
|
for (const r of raw) {
|
|
const entry = this.entries.get(r.id);
|
|
if (!entry)
|
|
continue;
|
|
if (options.filters?.namespace && entry.namespace !== options.filters.namespace)
|
|
continue;
|
|
if (options.filters?.tags && !options.filters.tags.every(t => entry.tags.includes(t)))
|
|
continue;
|
|
if (options.filters?.memoryType && entry.type !== options.filters.memoryType)
|
|
continue;
|
|
results.push({ entry, score: r.score, distance: 1 - r.score });
|
|
}
|
|
results = results.slice(0, options.k);
|
|
}
|
|
else {
|
|
results = this.bruteForceSearch(embedding, options);
|
|
}
|
|
this.recordTiming(this.searchTimes, start);
|
|
return results;
|
|
}
|
|
async bulkInsert(entries) {
|
|
for (const entry of entries) {
|
|
this.entries.set(entry.id, entry);
|
|
this.keyIndex.set(this.compositeKey(entry.namespace, entry.key), entry.id);
|
|
if (entry.embedding && this.hnswIndex)
|
|
this.hnswIndex.add(entry.id, entry.embedding);
|
|
}
|
|
this.dirty = true;
|
|
}
|
|
async bulkDelete(ids) {
|
|
let count = 0;
|
|
for (const id of ids) {
|
|
const entry = this.entries.get(id);
|
|
if (entry) {
|
|
this.entries.delete(id);
|
|
this.keyIndex.delete(this.compositeKey(entry.namespace, entry.key));
|
|
if (this.hnswIndex)
|
|
this.hnswIndex.remove(id);
|
|
count++;
|
|
}
|
|
}
|
|
this.dirty = true;
|
|
return count;
|
|
}
|
|
async count(namespace) {
|
|
if (!namespace)
|
|
return this.entries.size;
|
|
let c = 0;
|
|
for (const entry of this.entries.values()) {
|
|
if (entry.namespace === namespace)
|
|
c++;
|
|
}
|
|
return c;
|
|
}
|
|
async listNamespaces() {
|
|
const ns = new Set();
|
|
for (const entry of this.entries.values())
|
|
ns.add(entry.namespace);
|
|
return Array.from(ns);
|
|
}
|
|
async clearNamespace(namespace) {
|
|
const toDelete = [];
|
|
for (const [id, entry] of this.entries) {
|
|
if (entry.namespace === namespace)
|
|
toDelete.push(id);
|
|
}
|
|
for (const id of toDelete) {
|
|
const entry = this.entries.get(id);
|
|
this.entries.delete(id);
|
|
this.keyIndex.delete(this.compositeKey(entry.namespace, entry.key));
|
|
if (this.hnswIndex)
|
|
this.hnswIndex.remove(id);
|
|
}
|
|
if (toDelete.length > 0)
|
|
this.dirty = true;
|
|
return toDelete.length;
|
|
}
|
|
async getStats() {
|
|
const entriesByNamespace = {};
|
|
const entriesByType = {};
|
|
let memoryUsage = 0;
|
|
for (const entry of this.entries.values()) {
|
|
entriesByNamespace[entry.namespace] = (entriesByNamespace[entry.namespace] ?? 0) + 1;
|
|
entriesByType[entry.type] = (entriesByType[entry.type] ?? 0) + 1;
|
|
memoryUsage += entry.content.length * 2;
|
|
if (entry.embedding)
|
|
memoryUsage += entry.embedding.byteLength;
|
|
}
|
|
const avgQuery = this.avg(this.queryTimes);
|
|
const avgSearch = this.avg(this.searchTimes);
|
|
return {
|
|
totalEntries: this.entries.size,
|
|
entriesByNamespace,
|
|
entriesByType: entriesByType,
|
|
memoryUsage,
|
|
hnswStats: this.hnswIndex ? {
|
|
vectorCount: this.hnswIndex.size,
|
|
memoryUsage: this.hnswIndex.size * this.config.dimensions * 4,
|
|
avgSearchTime: avgSearch,
|
|
buildTime: 0,
|
|
} : undefined,
|
|
avgQueryTime: avgQuery,
|
|
avgSearchTime: avgSearch,
|
|
};
|
|
}
|
|
async healthCheck() {
|
|
const issues = [];
|
|
const recommendations = [];
|
|
if (!this.initialized)
|
|
issues.push('Backend not initialized');
|
|
if (!this.hnswIndex && !this.nativeDb) {
|
|
issues.push('No vector index available');
|
|
recommendations.push('Install @ruvector/rvf for native HNSW performance');
|
|
}
|
|
const status = issues.length === 0
|
|
? 'healthy'
|
|
: issues.some(i => i.includes('not initialized')) ? 'unhealthy' : 'degraded';
|
|
return {
|
|
status,
|
|
components: {
|
|
storage: { status: this.initialized ? 'healthy' : 'unhealthy', latency: 0 },
|
|
index: { status: this.hnswIndex || this.nativeDb ? 'healthy' : 'degraded', latency: 0 },
|
|
cache: { status: 'healthy', latency: 0 },
|
|
},
|
|
timestamp: Date.now(),
|
|
issues,
|
|
recommendations,
|
|
};
|
|
}
|
|
async tryNativeInit() {
|
|
try {
|
|
const rvf = await import('@ruvector/rvf');
|
|
this.nativeDb = new rvf.RvfDatabase({
|
|
path: this.config.databasePath,
|
|
dimensions: this.config.dimensions,
|
|
metric: this.config.metric,
|
|
quantization: this.config.quantization,
|
|
hnswM: this.config.hnswM,
|
|
hnswEfConstruction: this.config.hnswEfConstruction,
|
|
maxElements: this.config.maxElements,
|
|
});
|
|
await this.nativeDb.open();
|
|
if (this.config.verbose) {
|
|
console.log('[RvfBackend] Native @ruvector/rvf loaded successfully');
|
|
}
|
|
return true;
|
|
}
|
|
catch {
|
|
if (this.config.verbose) {
|
|
console.log('[RvfBackend] @ruvector/rvf not available, using pure-TS fallback');
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
compositeKey(namespace, key) {
|
|
return `${namespace}\0${key}`;
|
|
}
|
|
bruteForceSearch(embedding, options) {
|
|
const results = [];
|
|
for (const entry of this.entries.values()) {
|
|
if (!entry.embedding)
|
|
continue;
|
|
const score = cosineSimilarity(embedding, entry.embedding);
|
|
if (options.threshold && score < options.threshold)
|
|
continue;
|
|
if (options.filters?.namespace && entry.namespace !== options.filters.namespace)
|
|
continue;
|
|
if (options.filters?.tags && !options.filters.tags.every(t => entry.tags.includes(t)))
|
|
continue;
|
|
results.push({ entry, score, distance: 1 - score });
|
|
}
|
|
results.sort((a, b) => b.score - a.score);
|
|
return results.slice(0, options.k);
|
|
}
|
|
recordTiming(arr, start) {
|
|
arr.push(performance.now() - start);
|
|
if (arr.length > 100)
|
|
arr.shift();
|
|
}
|
|
avg(arr) {
|
|
return arr.length > 0 ? arr.reduce((a, b) => a + b, 0) / arr.length : 0;
|
|
}
|
|
async loadFromDisk() {
|
|
if (this.config.databasePath === ':memory:')
|
|
return;
|
|
if (!existsSync(this.config.databasePath))
|
|
return;
|
|
try {
|
|
const raw = await readFile(this.config.databasePath);
|
|
if (raw.length < 8)
|
|
return;
|
|
const magic = String.fromCharCode(raw[0], raw[1], raw[2], raw[3]);
|
|
if (magic !== MAGIC)
|
|
return;
|
|
const headerLen = raw.readUInt32LE(4);
|
|
const MAX_HEADER_SIZE = 10 * 1024 * 1024; // 10MB max header
|
|
if (headerLen > MAX_HEADER_SIZE || 8 + headerLen > raw.length)
|
|
return;
|
|
const headerJson = raw.subarray(8, 8 + headerLen).toString('utf-8');
|
|
let header;
|
|
try {
|
|
header = JSON.parse(headerJson);
|
|
}
|
|
catch {
|
|
if (this.config.verbose)
|
|
console.error('[RvfBackend] Corrupt RVF header');
|
|
return;
|
|
}
|
|
if (!header || typeof header.entryCount !== 'number' || typeof header.version !== 'number')
|
|
return;
|
|
let offset = 8 + headerLen;
|
|
for (let i = 0; i < header.entryCount; i++) {
|
|
if (offset + 4 > raw.length)
|
|
break;
|
|
const entryLen = raw.readUInt32LE(offset);
|
|
offset += 4;
|
|
if (offset + entryLen > raw.length)
|
|
break;
|
|
const entryJson = raw.subarray(offset, offset + entryLen).toString('utf-8');
|
|
offset += entryLen;
|
|
const parsed = JSON.parse(entryJson);
|
|
if (parsed.embedding)
|
|
parsed.embedding = new Float32Array(parsed.embedding);
|
|
const entry = parsed;
|
|
this.entries.set(entry.id, entry);
|
|
this.keyIndex.set(this.compositeKey(entry.namespace, entry.key), entry.id);
|
|
if (entry.embedding && this.hnswIndex)
|
|
this.hnswIndex.add(entry.id, entry.embedding);
|
|
}
|
|
}
|
|
catch (err) {
|
|
if (this.config.verbose) {
|
|
console.error('[RvfBackend] Error loading from disk:', err);
|
|
}
|
|
}
|
|
}
|
|
async persistToDisk() {
|
|
if (this.config.databasePath === ':memory:')
|
|
return;
|
|
if (this.persisting)
|
|
return; // Prevent concurrent persist calls
|
|
this.persisting = true;
|
|
try {
|
|
const dir = dirname(this.config.databasePath);
|
|
if (!existsSync(dir))
|
|
await mkdir(dir, { recursive: true });
|
|
const entries = Array.from(this.entries.values());
|
|
// Compute min createdAt without spread operator (avoids stack overflow for large arrays)
|
|
let minCreatedAt = Date.now();
|
|
for (const e of entries) {
|
|
if (e.createdAt < minCreatedAt)
|
|
minCreatedAt = e.createdAt;
|
|
}
|
|
const header = {
|
|
magic: MAGIC,
|
|
version: VERSION,
|
|
dimensions: this.config.dimensions,
|
|
metric: this.config.metric,
|
|
quantization: this.config.quantization,
|
|
entryCount: entries.length,
|
|
createdAt: entries.length > 0 ? minCreatedAt : Date.now(),
|
|
updatedAt: Date.now(),
|
|
};
|
|
const headerBuf = Buffer.from(JSON.stringify(header), 'utf-8');
|
|
const entryBuffers = [];
|
|
for (const entry of entries) {
|
|
const serialized = {
|
|
...entry,
|
|
embedding: entry.embedding ? Array.from(entry.embedding) : undefined,
|
|
};
|
|
const buf = Buffer.from(JSON.stringify(serialized), 'utf-8');
|
|
const lenBuf = Buffer.alloc(4);
|
|
lenBuf.writeUInt32LE(buf.length, 0);
|
|
entryBuffers.push(lenBuf, buf);
|
|
}
|
|
const magicBuf = Buffer.from([0x52, 0x56, 0x46, 0x00]);
|
|
const headerLenBuf = Buffer.alloc(4);
|
|
headerLenBuf.writeUInt32LE(headerBuf.length, 0);
|
|
const output = Buffer.concat([magicBuf, headerLenBuf, headerBuf, ...entryBuffers]);
|
|
// Atomic write: write to temp file then rename (crash-safe)
|
|
const tmpPath = this.config.databasePath + '.tmp';
|
|
await writeFile(tmpPath, output);
|
|
await rename(tmpPath, this.config.databasePath);
|
|
this.dirty = false;
|
|
}
|
|
finally {
|
|
this.persisting = false;
|
|
}
|
|
}
|
|
}
|
|
//# sourceMappingURL=rvf-backend.js.map
|