tasq/node_modules/@claude-flow/memory/dist/migration.js

513 lines
19 KiB
JavaScript

/**
* V3 Memory Migration Utility
*
* Migrates data from legacy memory systems (SQLite, Markdown, JSON, etc.)
* to the unified AgentDB-backed memory system with HNSW indexing.
*
* @module v3/memory/migration
*/
import { EventEmitter } from 'node:events';
import { promises as fs } from 'node:fs';
import * as path from 'node:path';
import { createDefaultEntry, } from './types.js';
/**
* Default migration configuration
*/
const DEFAULT_MIGRATION_CONFIG = {
batchSize: 100,
generateEmbeddings: true,
validateData: true,
continueOnError: true,
};
/**
* Memory Migration Manager
*
* Handles migration from:
* - SQLite backends (.db files)
* - Markdown backends (.md files)
* - JSON memory stores (.json files)
* - MemoryManager instances
* - SwarmMemory instances
* - DistributedMemory instances
*/
export class MemoryMigrator extends EventEmitter {
config;
target;
embeddingGenerator;
progress;
constructor(target, config, embeddingGenerator) {
super();
this.target = target;
this.config = { ...DEFAULT_MIGRATION_CONFIG, ...config };
this.embeddingGenerator = embeddingGenerator;
this.progress = this.initializeProgress();
}
/**
* Run the migration
*/
async migrate() {
const startTime = Date.now();
this.progress = this.initializeProgress();
this.emit('migration:started', { source: this.config.source });
try {
// Load entries from source
const entries = await this.loadFromSource();
this.progress.total = entries.length;
this.progress.totalBatches = Math.ceil(entries.length / this.config.batchSize);
this.emit('migration:progress', { ...this.progress });
// Process in batches
for (let i = 0; i < entries.length; i += this.config.batchSize) {
const batch = entries.slice(i, i + this.config.batchSize);
this.progress.currentBatch = Math.floor(i / this.config.batchSize) + 1;
await this.processBatch(batch);
this.progress.percentage = Math.round((this.progress.migrated / this.progress.total) * 100);
this.progress.estimatedTimeRemaining = this.estimateTimeRemaining(startTime, this.progress.migrated, this.progress.total);
this.emit('migration:progress', { ...this.progress });
}
const duration = Date.now() - startTime;
const result = {
success: this.progress.failed === 0 || this.config.continueOnError,
progress: { ...this.progress },
duration,
summary: this.generateSummary(),
};
this.emit('migration:completed', result);
return result;
}
catch (error) {
const duration = Date.now() - startTime;
const result = {
success: false,
progress: { ...this.progress },
duration,
summary: `Migration failed: ${error.message}`,
};
this.emit('migration:failed', { error, result });
return result;
}
}
/**
* Get current migration progress
*/
getProgress() {
return { ...this.progress };
}
// ===== Source Loaders =====
async loadFromSource() {
switch (this.config.source) {
case 'sqlite':
return this.loadFromSQLite();
case 'markdown':
return this.loadFromMarkdown();
case 'json':
return this.loadFromJSON();
case 'memory-manager':
return this.loadFromMemoryManager();
case 'swarm-memory':
return this.loadFromSwarmMemory();
case 'distributed-memory':
return this.loadFromDistributedMemory();
default:
throw new Error(`Unknown migration source: ${this.config.source}`);
}
}
async loadFromSQLite() {
const entries = [];
const dbPath = this.config.sourcePath;
try {
// Dynamic import for better-sqlite3 or similar
// In production, would use actual SQLite library
const fileContent = await fs.readFile(dbPath);
// Parse SQLite format (simplified - actual implementation would use SQLite library)
// For now, we'll try to read it as a JSON export format
if (dbPath.endsWith('.json')) {
const data = JSON.parse(fileContent.toString());
if (Array.isArray(data)) {
return data;
}
else if (data.entries) {
return data.entries;
}
}
// SQLite parsing would go here using better-sqlite3 or sql.js
this.emit('migration:warning', {
message: 'Direct SQLite parsing requires additional setup. Using export format.',
});
return entries;
}
catch (error) {
throw new Error(`Failed to load SQLite: ${error.message}`);
}
}
async loadFromMarkdown() {
const entries = [];
const basePath = this.config.sourcePath;
try {
const files = await this.walkDirectory(basePath, '.md');
for (const filePath of files) {
try {
const content = await fs.readFile(filePath, 'utf-8');
const entry = this.parseMarkdownEntry(filePath, content, basePath);
if (entry) {
entries.push(entry);
}
}
catch (error) {
this.addError(filePath, error.message, 'PARSE_ERROR', true);
}
}
return entries;
}
catch (error) {
throw new Error(`Failed to load Markdown: ${error.message}`);
}
}
async loadFromJSON() {
const filePath = this.config.sourcePath;
try {
const content = await fs.readFile(filePath, 'utf-8');
const data = JSON.parse(content);
// Handle different JSON formats
if (Array.isArray(data)) {
return data;
}
else if (data.entries) {
return data.entries;
}
else if (typeof data === 'object') {
// Assume it's a namespace -> entries map
const entries = [];
for (const [namespace, namespaceEntries] of Object.entries(data)) {
if (Array.isArray(namespaceEntries)) {
for (const entry of namespaceEntries) {
entries.push({ ...entry, namespace });
}
}
}
return entries;
}
return [];
}
catch (error) {
throw new Error(`Failed to load JSON: ${error.message}`);
}
}
async loadFromMemoryManager() {
// Would integrate with existing MemoryManager instance
// For now, try to load from common paths
const possiblePaths = [
'./memory/memory-store.json',
'./.swarm/memory.db',
'./memory.json',
];
for (const p of possiblePaths) {
try {
const fullPath = path.resolve(this.config.sourcePath, p);
await fs.access(fullPath);
return this.loadFromJSON();
}
catch {
continue;
}
}
return [];
}
async loadFromSwarmMemory() {
// Would integrate with SwarmMemory partitions
const entries = [];
const basePath = this.config.sourcePath;
try {
// Check for swarm memory directory structure
const partitionsPath = path.join(basePath, '.swarm', 'memory');
const files = await this.walkDirectory(partitionsPath, '.json');
for (const filePath of files) {
try {
const content = await fs.readFile(filePath, 'utf-8');
const data = JSON.parse(content);
// Extract namespace from file path
const relativePath = path.relative(partitionsPath, filePath);
const namespace = path.dirname(relativePath).replace(/\\/g, '/');
if (Array.isArray(data)) {
entries.push(...data.map((e) => ({ ...e, namespace })));
}
else if (data.entries) {
entries.push(...data.entries.map((e) => ({ ...e, namespace })));
}
}
catch (error) {
this.addError(filePath, error.message, 'PARSE_ERROR', true);
}
}
return entries;
}
catch (error) {
return [];
}
}
async loadFromDistributedMemory() {
// Would integrate with DistributedMemorySystem nodes
return this.loadFromSwarmMemory(); // Similar structure
}
// ===== Batch Processing =====
async processBatch(batch) {
for (const legacyEntry of batch) {
try {
// Validate if enabled
if (this.config.validateData) {
const validation = this.validateEntry(legacyEntry);
if (!validation.valid) {
if (this.config.continueOnError) {
this.addError(legacyEntry.key || 'unknown', validation.reason || 'Validation failed', 'VALIDATION_ERROR', false);
this.progress.skipped++;
continue;
}
else {
throw new Error(validation.reason);
}
}
}
// Transform to new format
const newEntry = await this.transformEntry(legacyEntry);
// Store in target
await this.target.store(newEntry);
this.progress.migrated++;
}
catch (error) {
if (this.config.continueOnError) {
this.addError(legacyEntry.key || 'unknown', error.message, 'STORE_ERROR', true);
this.progress.failed++;
}
else {
throw error;
}
}
}
}
async transformEntry(legacy) {
// Map namespace if configured
let namespace = legacy.namespace || 'default';
if (this.config.namespaceMapping && this.config.namespaceMapping[namespace]) {
namespace = this.config.namespaceMapping[namespace];
}
// Determine content
const content = typeof legacy.value === 'string'
? legacy.value
: JSON.stringify(legacy.value);
// Map type if configured
let type = 'semantic';
if (legacy.metadata?.type && typeof legacy.metadata.type === 'string') {
if (this.config.typeMapping && this.config.typeMapping[legacy.metadata.type]) {
type = this.config.typeMapping[legacy.metadata.type];
}
else if (this.isValidMemoryType(legacy.metadata.type)) {
type = legacy.metadata.type;
}
}
// Parse timestamps
const createdAt = this.parseTimestamp(legacy.createdAt || legacy.created_at || legacy.timestamp);
const updatedAt = this.parseTimestamp(legacy.updatedAt || legacy.updated_at || legacy.timestamp);
const input = {
key: legacy.key,
content,
type,
namespace,
tags: legacy.tags || [],
metadata: {
...legacy.metadata,
migrated: true,
migrationSource: this.config.source,
migrationTimestamp: Date.now(),
originalValue: legacy.value,
},
};
const entry = createDefaultEntry(input);
entry.createdAt = createdAt;
entry.updatedAt = updatedAt;
// Generate embedding if configured
if (this.config.generateEmbeddings && this.embeddingGenerator) {
try {
entry.embedding = await this.embeddingGenerator(content);
}
catch (error) {
// Log but don't fail
this.emit('migration:warning', {
message: `Failed to generate embedding for ${legacy.key}: ${error.message}`,
});
}
}
return entry;
}
// ===== Helper Methods =====
initializeProgress() {
return {
total: 0,
migrated: 0,
failed: 0,
skipped: 0,
currentBatch: 0,
totalBatches: 0,
percentage: 0,
estimatedTimeRemaining: 0,
errors: [],
};
}
validateEntry(entry) {
if (!entry.key || typeof entry.key !== 'string') {
return { valid: false, reason: 'Missing or invalid key' };
}
if (entry.value === undefined) {
return { valid: false, reason: 'Missing value' };
}
if (entry.key.length > 500) {
return { valid: false, reason: 'Key too long (max 500 chars)' };
}
return { valid: true };
}
addError(entryId, message, code, recoverable) {
const error = {
entryId,
message,
code,
recoverable,
};
this.progress.errors.push(error);
this.emit('migration:error', error);
}
parseTimestamp(value) {
if (!value)
return Date.now();
if (typeof value === 'number') {
// Handle both milliseconds and seconds
return value > 1e12 ? value : value * 1000;
}
const parsed = Date.parse(value);
return isNaN(parsed) ? Date.now() : parsed;
}
isValidMemoryType(type) {
return ['episodic', 'semantic', 'procedural', 'working', 'cache'].includes(type);
}
estimateTimeRemaining(startTime, completed, total) {
if (completed === 0)
return 0;
const elapsed = Date.now() - startTime;
const rate = completed / elapsed;
const remaining = total - completed;
return Math.round(remaining / rate);
}
generateSummary() {
const { migrated, failed, skipped, total, errors } = this.progress;
let summary = `Migrated ${migrated}/${total} entries`;
if (failed > 0) {
summary += `, ${failed} failed`;
}
if (skipped > 0) {
summary += `, ${skipped} skipped`;
}
if (errors.length > 0) {
const errorTypes = new Map();
for (const error of errors) {
errorTypes.set(error.code, (errorTypes.get(error.code) || 0) + 1);
}
const errorSummary = Array.from(errorTypes.entries())
.map(([code, count]) => `${code}: ${count}`)
.join(', ');
summary += `. Errors: ${errorSummary}`;
}
return summary;
}
async walkDirectory(dir, extension) {
const files = [];
try {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory()) {
const subFiles = await this.walkDirectory(fullPath, extension);
files.push(...subFiles);
}
else if (entry.isFile() && entry.name.endsWith(extension)) {
files.push(fullPath);
}
}
}
catch (error) {
// Directory doesn't exist or isn't readable
}
return files;
}
parseMarkdownEntry(filePath, content, basePath) {
// Extract frontmatter if present
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
let metadata = {};
let body = content;
if (frontmatterMatch) {
try {
// Simple YAML-like parsing
const frontmatter = frontmatterMatch[1];
for (const line of frontmatter.split('\n')) {
const colonIndex = line.indexOf(':');
if (colonIndex > 0) {
const key = line.substring(0, colonIndex).trim();
let value = line.substring(colonIndex + 1).trim();
// Parse common types
if (value === 'true')
value = true;
else if (value === 'false')
value = false;
else if (typeof value === 'string' && /^\d+$/.test(value))
value = parseInt(value, 10);
else if (typeof value === 'string' && value.startsWith('[') && value.endsWith(']')) {
try {
value = JSON.parse(value.replace(/'/g, '"'));
}
catch {
// Keep as string
}
}
metadata[key] = value;
}
}
body = frontmatterMatch[2];
}
catch {
// Failed to parse frontmatter, use whole content
}
}
// Derive key from file path
const relativePath = path.relative(basePath, filePath);
const key = relativePath
.replace(/\\/g, '/')
.replace(/\.md$/, '')
.replace(/\//g, ':');
// Derive namespace from directory structure
const namespace = path.dirname(relativePath).replace(/\\/g, '/') || 'default';
return {
key,
value: body.trim(),
namespace,
tags: Array.isArray(metadata.tags) ? metadata.tags : [],
metadata,
timestamp: Date.now(),
};
}
}
/**
* Convenience function to create a migrator
*/
export function createMigrator(target, source, sourcePath, options = {}, embeddingGenerator) {
return new MemoryMigrator(target, { source, sourcePath, ...options }, embeddingGenerator);
}
/**
* Migrate from multiple sources
*/
export async function migrateMultipleSources(target, sources, options = {}, embeddingGenerator) {
const results = [];
for (const { source, path: sourcePath } of sources) {
const migrator = createMigrator(target, source, sourcePath, options, embeddingGenerator);
const result = await migrator.migrate();
results.push(result);
}
return results;
}
export default MemoryMigrator;
//# sourceMappingURL=migration.js.map