513 lines
19 KiB
JavaScript
513 lines
19 KiB
JavaScript
/**
|
|
* V3 Memory Migration Utility
|
|
*
|
|
* Migrates data from legacy memory systems (SQLite, Markdown, JSON, etc.)
|
|
* to the unified AgentDB-backed memory system with HNSW indexing.
|
|
*
|
|
* @module v3/memory/migration
|
|
*/
|
|
import { EventEmitter } from 'node:events';
|
|
import { promises as fs } from 'node:fs';
|
|
import * as path from 'node:path';
|
|
import { createDefaultEntry, } from './types.js';
|
|
/**
|
|
* Default migration configuration
|
|
*/
|
|
const DEFAULT_MIGRATION_CONFIG = {
|
|
batchSize: 100,
|
|
generateEmbeddings: true,
|
|
validateData: true,
|
|
continueOnError: true,
|
|
};
|
|
/**
|
|
* Memory Migration Manager
|
|
*
|
|
* Handles migration from:
|
|
* - SQLite backends (.db files)
|
|
* - Markdown backends (.md files)
|
|
* - JSON memory stores (.json files)
|
|
* - MemoryManager instances
|
|
* - SwarmMemory instances
|
|
* - DistributedMemory instances
|
|
*/
|
|
export class MemoryMigrator extends EventEmitter {
|
|
config;
|
|
target;
|
|
embeddingGenerator;
|
|
progress;
|
|
constructor(target, config, embeddingGenerator) {
|
|
super();
|
|
this.target = target;
|
|
this.config = { ...DEFAULT_MIGRATION_CONFIG, ...config };
|
|
this.embeddingGenerator = embeddingGenerator;
|
|
this.progress = this.initializeProgress();
|
|
}
|
|
/**
|
|
* Run the migration
|
|
*/
|
|
async migrate() {
|
|
const startTime = Date.now();
|
|
this.progress = this.initializeProgress();
|
|
this.emit('migration:started', { source: this.config.source });
|
|
try {
|
|
// Load entries from source
|
|
const entries = await this.loadFromSource();
|
|
this.progress.total = entries.length;
|
|
this.progress.totalBatches = Math.ceil(entries.length / this.config.batchSize);
|
|
this.emit('migration:progress', { ...this.progress });
|
|
// Process in batches
|
|
for (let i = 0; i < entries.length; i += this.config.batchSize) {
|
|
const batch = entries.slice(i, i + this.config.batchSize);
|
|
this.progress.currentBatch = Math.floor(i / this.config.batchSize) + 1;
|
|
await this.processBatch(batch);
|
|
this.progress.percentage = Math.round((this.progress.migrated / this.progress.total) * 100);
|
|
this.progress.estimatedTimeRemaining = this.estimateTimeRemaining(startTime, this.progress.migrated, this.progress.total);
|
|
this.emit('migration:progress', { ...this.progress });
|
|
}
|
|
const duration = Date.now() - startTime;
|
|
const result = {
|
|
success: this.progress.failed === 0 || this.config.continueOnError,
|
|
progress: { ...this.progress },
|
|
duration,
|
|
summary: this.generateSummary(),
|
|
};
|
|
this.emit('migration:completed', result);
|
|
return result;
|
|
}
|
|
catch (error) {
|
|
const duration = Date.now() - startTime;
|
|
const result = {
|
|
success: false,
|
|
progress: { ...this.progress },
|
|
duration,
|
|
summary: `Migration failed: ${error.message}`,
|
|
};
|
|
this.emit('migration:failed', { error, result });
|
|
return result;
|
|
}
|
|
}
|
|
/**
|
|
* Get current migration progress
|
|
*/
|
|
getProgress() {
|
|
return { ...this.progress };
|
|
}
|
|
// ===== Source Loaders =====
|
|
async loadFromSource() {
|
|
switch (this.config.source) {
|
|
case 'sqlite':
|
|
return this.loadFromSQLite();
|
|
case 'markdown':
|
|
return this.loadFromMarkdown();
|
|
case 'json':
|
|
return this.loadFromJSON();
|
|
case 'memory-manager':
|
|
return this.loadFromMemoryManager();
|
|
case 'swarm-memory':
|
|
return this.loadFromSwarmMemory();
|
|
case 'distributed-memory':
|
|
return this.loadFromDistributedMemory();
|
|
default:
|
|
throw new Error(`Unknown migration source: ${this.config.source}`);
|
|
}
|
|
}
|
|
async loadFromSQLite() {
|
|
const entries = [];
|
|
const dbPath = this.config.sourcePath;
|
|
try {
|
|
// Dynamic import for better-sqlite3 or similar
|
|
// In production, would use actual SQLite library
|
|
const fileContent = await fs.readFile(dbPath);
|
|
// Parse SQLite format (simplified - actual implementation would use SQLite library)
|
|
// For now, we'll try to read it as a JSON export format
|
|
if (dbPath.endsWith('.json')) {
|
|
const data = JSON.parse(fileContent.toString());
|
|
if (Array.isArray(data)) {
|
|
return data;
|
|
}
|
|
else if (data.entries) {
|
|
return data.entries;
|
|
}
|
|
}
|
|
// SQLite parsing would go here using better-sqlite3 or sql.js
|
|
this.emit('migration:warning', {
|
|
message: 'Direct SQLite parsing requires additional setup. Using export format.',
|
|
});
|
|
return entries;
|
|
}
|
|
catch (error) {
|
|
throw new Error(`Failed to load SQLite: ${error.message}`);
|
|
}
|
|
}
|
|
async loadFromMarkdown() {
|
|
const entries = [];
|
|
const basePath = this.config.sourcePath;
|
|
try {
|
|
const files = await this.walkDirectory(basePath, '.md');
|
|
for (const filePath of files) {
|
|
try {
|
|
const content = await fs.readFile(filePath, 'utf-8');
|
|
const entry = this.parseMarkdownEntry(filePath, content, basePath);
|
|
if (entry) {
|
|
entries.push(entry);
|
|
}
|
|
}
|
|
catch (error) {
|
|
this.addError(filePath, error.message, 'PARSE_ERROR', true);
|
|
}
|
|
}
|
|
return entries;
|
|
}
|
|
catch (error) {
|
|
throw new Error(`Failed to load Markdown: ${error.message}`);
|
|
}
|
|
}
|
|
async loadFromJSON() {
|
|
const filePath = this.config.sourcePath;
|
|
try {
|
|
const content = await fs.readFile(filePath, 'utf-8');
|
|
const data = JSON.parse(content);
|
|
// Handle different JSON formats
|
|
if (Array.isArray(data)) {
|
|
return data;
|
|
}
|
|
else if (data.entries) {
|
|
return data.entries;
|
|
}
|
|
else if (typeof data === 'object') {
|
|
// Assume it's a namespace -> entries map
|
|
const entries = [];
|
|
for (const [namespace, namespaceEntries] of Object.entries(data)) {
|
|
if (Array.isArray(namespaceEntries)) {
|
|
for (const entry of namespaceEntries) {
|
|
entries.push({ ...entry, namespace });
|
|
}
|
|
}
|
|
}
|
|
return entries;
|
|
}
|
|
return [];
|
|
}
|
|
catch (error) {
|
|
throw new Error(`Failed to load JSON: ${error.message}`);
|
|
}
|
|
}
|
|
async loadFromMemoryManager() {
|
|
// Would integrate with existing MemoryManager instance
|
|
// For now, try to load from common paths
|
|
const possiblePaths = [
|
|
'./memory/memory-store.json',
|
|
'./.swarm/memory.db',
|
|
'./memory.json',
|
|
];
|
|
for (const p of possiblePaths) {
|
|
try {
|
|
const fullPath = path.resolve(this.config.sourcePath, p);
|
|
await fs.access(fullPath);
|
|
return this.loadFromJSON();
|
|
}
|
|
catch {
|
|
continue;
|
|
}
|
|
}
|
|
return [];
|
|
}
|
|
async loadFromSwarmMemory() {
|
|
// Would integrate with SwarmMemory partitions
|
|
const entries = [];
|
|
const basePath = this.config.sourcePath;
|
|
try {
|
|
// Check for swarm memory directory structure
|
|
const partitionsPath = path.join(basePath, '.swarm', 'memory');
|
|
const files = await this.walkDirectory(partitionsPath, '.json');
|
|
for (const filePath of files) {
|
|
try {
|
|
const content = await fs.readFile(filePath, 'utf-8');
|
|
const data = JSON.parse(content);
|
|
// Extract namespace from file path
|
|
const relativePath = path.relative(partitionsPath, filePath);
|
|
const namespace = path.dirname(relativePath).replace(/\\/g, '/');
|
|
if (Array.isArray(data)) {
|
|
entries.push(...data.map((e) => ({ ...e, namespace })));
|
|
}
|
|
else if (data.entries) {
|
|
entries.push(...data.entries.map((e) => ({ ...e, namespace })));
|
|
}
|
|
}
|
|
catch (error) {
|
|
this.addError(filePath, error.message, 'PARSE_ERROR', true);
|
|
}
|
|
}
|
|
return entries;
|
|
}
|
|
catch (error) {
|
|
return [];
|
|
}
|
|
}
|
|
async loadFromDistributedMemory() {
|
|
// Would integrate with DistributedMemorySystem nodes
|
|
return this.loadFromSwarmMemory(); // Similar structure
|
|
}
|
|
// ===== Batch Processing =====
|
|
async processBatch(batch) {
|
|
for (const legacyEntry of batch) {
|
|
try {
|
|
// Validate if enabled
|
|
if (this.config.validateData) {
|
|
const validation = this.validateEntry(legacyEntry);
|
|
if (!validation.valid) {
|
|
if (this.config.continueOnError) {
|
|
this.addError(legacyEntry.key || 'unknown', validation.reason || 'Validation failed', 'VALIDATION_ERROR', false);
|
|
this.progress.skipped++;
|
|
continue;
|
|
}
|
|
else {
|
|
throw new Error(validation.reason);
|
|
}
|
|
}
|
|
}
|
|
// Transform to new format
|
|
const newEntry = await this.transformEntry(legacyEntry);
|
|
// Store in target
|
|
await this.target.store(newEntry);
|
|
this.progress.migrated++;
|
|
}
|
|
catch (error) {
|
|
if (this.config.continueOnError) {
|
|
this.addError(legacyEntry.key || 'unknown', error.message, 'STORE_ERROR', true);
|
|
this.progress.failed++;
|
|
}
|
|
else {
|
|
throw error;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
async transformEntry(legacy) {
|
|
// Map namespace if configured
|
|
let namespace = legacy.namespace || 'default';
|
|
if (this.config.namespaceMapping && this.config.namespaceMapping[namespace]) {
|
|
namespace = this.config.namespaceMapping[namespace];
|
|
}
|
|
// Determine content
|
|
const content = typeof legacy.value === 'string'
|
|
? legacy.value
|
|
: JSON.stringify(legacy.value);
|
|
// Map type if configured
|
|
let type = 'semantic';
|
|
if (legacy.metadata?.type && typeof legacy.metadata.type === 'string') {
|
|
if (this.config.typeMapping && this.config.typeMapping[legacy.metadata.type]) {
|
|
type = this.config.typeMapping[legacy.metadata.type];
|
|
}
|
|
else if (this.isValidMemoryType(legacy.metadata.type)) {
|
|
type = legacy.metadata.type;
|
|
}
|
|
}
|
|
// Parse timestamps
|
|
const createdAt = this.parseTimestamp(legacy.createdAt || legacy.created_at || legacy.timestamp);
|
|
const updatedAt = this.parseTimestamp(legacy.updatedAt || legacy.updated_at || legacy.timestamp);
|
|
const input = {
|
|
key: legacy.key,
|
|
content,
|
|
type,
|
|
namespace,
|
|
tags: legacy.tags || [],
|
|
metadata: {
|
|
...legacy.metadata,
|
|
migrated: true,
|
|
migrationSource: this.config.source,
|
|
migrationTimestamp: Date.now(),
|
|
originalValue: legacy.value,
|
|
},
|
|
};
|
|
const entry = createDefaultEntry(input);
|
|
entry.createdAt = createdAt;
|
|
entry.updatedAt = updatedAt;
|
|
// Generate embedding if configured
|
|
if (this.config.generateEmbeddings && this.embeddingGenerator) {
|
|
try {
|
|
entry.embedding = await this.embeddingGenerator(content);
|
|
}
|
|
catch (error) {
|
|
// Log but don't fail
|
|
this.emit('migration:warning', {
|
|
message: `Failed to generate embedding for ${legacy.key}: ${error.message}`,
|
|
});
|
|
}
|
|
}
|
|
return entry;
|
|
}
|
|
// ===== Helper Methods =====
|
|
initializeProgress() {
|
|
return {
|
|
total: 0,
|
|
migrated: 0,
|
|
failed: 0,
|
|
skipped: 0,
|
|
currentBatch: 0,
|
|
totalBatches: 0,
|
|
percentage: 0,
|
|
estimatedTimeRemaining: 0,
|
|
errors: [],
|
|
};
|
|
}
|
|
validateEntry(entry) {
|
|
if (!entry.key || typeof entry.key !== 'string') {
|
|
return { valid: false, reason: 'Missing or invalid key' };
|
|
}
|
|
if (entry.value === undefined) {
|
|
return { valid: false, reason: 'Missing value' };
|
|
}
|
|
if (entry.key.length > 500) {
|
|
return { valid: false, reason: 'Key too long (max 500 chars)' };
|
|
}
|
|
return { valid: true };
|
|
}
|
|
addError(entryId, message, code, recoverable) {
|
|
const error = {
|
|
entryId,
|
|
message,
|
|
code,
|
|
recoverable,
|
|
};
|
|
this.progress.errors.push(error);
|
|
this.emit('migration:error', error);
|
|
}
|
|
parseTimestamp(value) {
|
|
if (!value)
|
|
return Date.now();
|
|
if (typeof value === 'number') {
|
|
// Handle both milliseconds and seconds
|
|
return value > 1e12 ? value : value * 1000;
|
|
}
|
|
const parsed = Date.parse(value);
|
|
return isNaN(parsed) ? Date.now() : parsed;
|
|
}
|
|
isValidMemoryType(type) {
|
|
return ['episodic', 'semantic', 'procedural', 'working', 'cache'].includes(type);
|
|
}
|
|
estimateTimeRemaining(startTime, completed, total) {
|
|
if (completed === 0)
|
|
return 0;
|
|
const elapsed = Date.now() - startTime;
|
|
const rate = completed / elapsed;
|
|
const remaining = total - completed;
|
|
return Math.round(remaining / rate);
|
|
}
|
|
generateSummary() {
|
|
const { migrated, failed, skipped, total, errors } = this.progress;
|
|
let summary = `Migrated ${migrated}/${total} entries`;
|
|
if (failed > 0) {
|
|
summary += `, ${failed} failed`;
|
|
}
|
|
if (skipped > 0) {
|
|
summary += `, ${skipped} skipped`;
|
|
}
|
|
if (errors.length > 0) {
|
|
const errorTypes = new Map();
|
|
for (const error of errors) {
|
|
errorTypes.set(error.code, (errorTypes.get(error.code) || 0) + 1);
|
|
}
|
|
const errorSummary = Array.from(errorTypes.entries())
|
|
.map(([code, count]) => `${code}: ${count}`)
|
|
.join(', ');
|
|
summary += `. Errors: ${errorSummary}`;
|
|
}
|
|
return summary;
|
|
}
|
|
async walkDirectory(dir, extension) {
|
|
const files = [];
|
|
try {
|
|
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
for (const entry of entries) {
|
|
const fullPath = path.join(dir, entry.name);
|
|
if (entry.isDirectory()) {
|
|
const subFiles = await this.walkDirectory(fullPath, extension);
|
|
files.push(...subFiles);
|
|
}
|
|
else if (entry.isFile() && entry.name.endsWith(extension)) {
|
|
files.push(fullPath);
|
|
}
|
|
}
|
|
}
|
|
catch (error) {
|
|
// Directory doesn't exist or isn't readable
|
|
}
|
|
return files;
|
|
}
|
|
parseMarkdownEntry(filePath, content, basePath) {
|
|
// Extract frontmatter if present
|
|
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
let metadata = {};
|
|
let body = content;
|
|
if (frontmatterMatch) {
|
|
try {
|
|
// Simple YAML-like parsing
|
|
const frontmatter = frontmatterMatch[1];
|
|
for (const line of frontmatter.split('\n')) {
|
|
const colonIndex = line.indexOf(':');
|
|
if (colonIndex > 0) {
|
|
const key = line.substring(0, colonIndex).trim();
|
|
let value = line.substring(colonIndex + 1).trim();
|
|
// Parse common types
|
|
if (value === 'true')
|
|
value = true;
|
|
else if (value === 'false')
|
|
value = false;
|
|
else if (typeof value === 'string' && /^\d+$/.test(value))
|
|
value = parseInt(value, 10);
|
|
else if (typeof value === 'string' && value.startsWith('[') && value.endsWith(']')) {
|
|
try {
|
|
value = JSON.parse(value.replace(/'/g, '"'));
|
|
}
|
|
catch {
|
|
// Keep as string
|
|
}
|
|
}
|
|
metadata[key] = value;
|
|
}
|
|
}
|
|
body = frontmatterMatch[2];
|
|
}
|
|
catch {
|
|
// Failed to parse frontmatter, use whole content
|
|
}
|
|
}
|
|
// Derive key from file path
|
|
const relativePath = path.relative(basePath, filePath);
|
|
const key = relativePath
|
|
.replace(/\\/g, '/')
|
|
.replace(/\.md$/, '')
|
|
.replace(/\//g, ':');
|
|
// Derive namespace from directory structure
|
|
const namespace = path.dirname(relativePath).replace(/\\/g, '/') || 'default';
|
|
return {
|
|
key,
|
|
value: body.trim(),
|
|
namespace,
|
|
tags: Array.isArray(metadata.tags) ? metadata.tags : [],
|
|
metadata,
|
|
timestamp: Date.now(),
|
|
};
|
|
}
|
|
}
|
|
/**
|
|
* Convenience function to create a migrator
|
|
*/
|
|
export function createMigrator(target, source, sourcePath, options = {}, embeddingGenerator) {
|
|
return new MemoryMigrator(target, { source, sourcePath, ...options }, embeddingGenerator);
|
|
}
|
|
/**
|
|
* Migrate from multiple sources
|
|
*/
|
|
export async function migrateMultipleSources(target, sources, options = {}, embeddingGenerator) {
|
|
const results = [];
|
|
for (const { source, path: sourcePath } of sources) {
|
|
const migrator = createMigrator(target, source, sourcePath, options, embeddingGenerator);
|
|
const result = await migrator.migrate();
|
|
results.push(result);
|
|
}
|
|
return results;
|
|
}
|
|
export default MemoryMigrator;
|
|
//# sourceMappingURL=migration.js.map
|