416 lines
20 KiB
JavaScript
416 lines
20 KiB
JavaScript
"use strict";
|
|
/**
|
|
* Standalone Frontier Features Validation
|
|
*
|
|
* Tests all frontier features without requiring full build infrastructure.
|
|
* Validates:
|
|
* 1. CausalMemoryGraph - causal edges, experiments, uplift calculation
|
|
* 2. ExplainableRecall - certificates, Merkle proofs, provenance
|
|
* 3. CausalRecall - utility-based reranking
|
|
* 4. NightlyLearner - automated causal discovery
|
|
*/
|
|
const Database = require('better-sqlite3');
|
|
const fs = require('fs');
|
|
const path = require('path');
|
|
const crypto = require('crypto');
|
|
// Color codes for output
|
|
const colors = {
|
|
reset: '\x1b[0m',
|
|
green: '\x1b[32m',
|
|
red: '\x1b[31m',
|
|
yellow: '\x1b[33m',
|
|
blue: '\x1b[34m',
|
|
cyan: '\x1b[36m'
|
|
};
|
|
function log(message, color = 'reset') {
|
|
console.log(`${colors[color]}${message}${colors.reset}`);
|
|
}
|
|
function assert(condition, message) {
|
|
if (!condition) {
|
|
throw new Error(`Assertion failed: ${message}`);
|
|
}
|
|
}
|
|
class FrontierValidator {
|
|
constructor() {
|
|
// Create in-memory database
|
|
this.db = new Database(':memory:');
|
|
// Load schemas
|
|
this.loadSchemas();
|
|
this.testsRun = 0;
|
|
this.testsPassed = 0;
|
|
this.testsFailed = 0;
|
|
}
|
|
loadSchemas() {
|
|
log('\n📦 Loading database schemas...', 'cyan');
|
|
// Load core schema
|
|
const coreSchema = fs.readFileSync(path.join(__dirname, 'schemas/schema.sql'), 'utf-8');
|
|
this.db.exec(coreSchema);
|
|
// Load frontier schema
|
|
const frontierSchema = fs.readFileSync(path.join(__dirname, 'schemas/frontier-schema.sql'), 'utf-8');
|
|
this.db.exec(frontierSchema);
|
|
log('✓ Schemas loaded successfully', 'green');
|
|
}
|
|
runTest(name, testFn) {
|
|
this.testsRun++;
|
|
try {
|
|
log(`\n🧪 ${name}`, 'blue');
|
|
testFn();
|
|
this.testsPassed++;
|
|
log(' ✅ PASSED', 'green');
|
|
}
|
|
catch (error) {
|
|
this.testsFailed++;
|
|
log(` ❌ FAILED: ${error.message}`, 'red');
|
|
console.error(error);
|
|
}
|
|
}
|
|
// ========================================================================
|
|
// CausalMemoryGraph Tests
|
|
// ========================================================================
|
|
testCausalEdgeInsertion() {
|
|
this.runTest('Causal Edge Insertion', () => {
|
|
// Insert test episodes
|
|
const stmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success)
|
|
VALUES (?, ?, ?, ?)
|
|
`);
|
|
stmt.run('session1', 'task1', 0.8, 1);
|
|
stmt.run('session1', 'task2', 0.9, 1);
|
|
// Insert causal edge
|
|
const edgeStmt = this.db.prepare(`
|
|
INSERT INTO causal_edges (
|
|
from_memory_id, from_memory_type, to_memory_id, to_memory_type,
|
|
similarity, uplift, confidence, sample_size, mechanism
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
const result = edgeStmt.run(1, 'episode', 2, 'episode', 0.85, 0.15, 0.9, 100, 'test mechanism');
|
|
const edgeId = result.lastInsertRowid;
|
|
assert(edgeId > 0, 'Edge should be inserted');
|
|
// Verify
|
|
const edge = this.db.prepare('SELECT * FROM causal_edges WHERE id = ?').get(edgeId);
|
|
assert(edge.from_memory_id === 1, 'From ID should match');
|
|
assert(edge.to_memory_id === 2, 'To ID should match');
|
|
assert(edge.similarity === 0.85, 'Similarity should match');
|
|
assert(edge.uplift === 0.15, 'Uplift should match');
|
|
assert(edge.confidence === 0.9, 'Confidence should match');
|
|
});
|
|
}
|
|
testABExperiment() {
|
|
this.runTest('A/B Experiment and Uplift Calculation', () => {
|
|
// Insert episodes for experiment
|
|
const epStmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success)
|
|
VALUES (?, ?, ?, ?)
|
|
`);
|
|
for (let i = 1; i <= 10; i++) {
|
|
epStmt.run(`session${i}`, `task${i}`, 0.7 + Math.random() * 0.2, 1);
|
|
}
|
|
// Create experiment
|
|
const expStmt = this.db.prepare(`
|
|
INSERT INTO causal_experiments (
|
|
name, hypothesis, treatment_id, treatment_type, start_time, sample_size, status
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
const expResult = expStmt.run('Test Experiment', 'Treatment improves outcome', 1, 'episode', Date.now(), 0, 'running');
|
|
const expId = expResult.lastInsertRowid;
|
|
assert(expId > 0, 'Experiment should be created');
|
|
// Record observations
|
|
const obsStmt = this.db.prepare(`
|
|
INSERT INTO causal_observations (
|
|
experiment_id, episode_id, is_treatment, outcome_value, outcome_type
|
|
) VALUES (?, ?, ?, ?, ?)
|
|
`);
|
|
// Treatment group (higher rewards)
|
|
for (let i = 0; i < 50; i++) {
|
|
obsStmt.run(expId, (i % 10) + 1, 1, 0.75 + Math.random() * 0.1, 'reward');
|
|
}
|
|
// Control group (lower rewards)
|
|
for (let i = 0; i < 50; i++) {
|
|
obsStmt.run(expId, (i % 10) + 1, 0, 0.55 + Math.random() * 0.1, 'reward');
|
|
}
|
|
// Check observations recorded
|
|
const obs = this.db.prepare('SELECT COUNT(*) as count FROM causal_observations WHERE experiment_id = ?').get(expId);
|
|
assert(obs.count === 100, 'Should have 100 observations');
|
|
// Calculate uplift (simplified inline calculation)
|
|
const treatmentObs = this.db.prepare(`
|
|
SELECT outcome_value FROM causal_observations
|
|
WHERE experiment_id = ? AND is_treatment = 1
|
|
`).all(expId);
|
|
const controlObs = this.db.prepare(`
|
|
SELECT outcome_value FROM causal_observations
|
|
WHERE experiment_id = ? AND is_treatment = 0
|
|
`).all(expId);
|
|
const treatmentMean = treatmentObs.reduce((sum, o) => sum + o.outcome_value, 0) / treatmentObs.length;
|
|
const controlMean = controlObs.reduce((sum, o) => sum + o.outcome_value, 0) / controlObs.length;
|
|
const uplift = treatmentMean - controlMean;
|
|
assert(uplift > 0.1, 'Uplift should be positive and significant');
|
|
log(` Uplift: ${uplift.toFixed(3)}, Treatment: ${treatmentMean.toFixed(3)}, Control: ${controlMean.toFixed(3)}`, 'cyan');
|
|
});
|
|
}
|
|
testCausalQuery() {
|
|
this.runTest('Causal Effect Query', () => {
|
|
// Insert test data
|
|
const epStmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success)
|
|
VALUES (?, ?, ?, ?)
|
|
`);
|
|
for (let i = 1; i <= 5; i++) {
|
|
epStmt.run(`session${i}`, `task${i}`, 0.8, 1);
|
|
}
|
|
// Insert causal edges
|
|
const edgeStmt = this.db.prepare(`
|
|
INSERT INTO causal_edges (
|
|
from_memory_id, from_memory_type, to_memory_id, to_memory_type,
|
|
similarity, uplift, confidence
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
edgeStmt.run(1, 'episode', 2, 'episode', 0.8, 0.2, 0.9);
|
|
edgeStmt.run(1, 'episode', 3, 'episode', 0.75, 0.15, 0.85);
|
|
edgeStmt.run(1, 'episode', 4, 'episode', 0.7, 0.05, 0.95); // Low uplift
|
|
edgeStmt.run(1, 'episode', 5, 'episode', 0.85, 0.25, 0.5); // Low confidence
|
|
// Query with filters
|
|
const effects = this.db.prepare(`
|
|
SELECT * FROM causal_edges
|
|
WHERE from_memory_id = ?
|
|
AND confidence >= ?
|
|
AND ABS(uplift) >= ?
|
|
ORDER BY ABS(uplift) * confidence DESC
|
|
`).all(1, 0.8, 0.1);
|
|
assert(effects.length === 2, 'Should find 2 high-confidence, high-uplift edges');
|
|
assert(effects[0].to_memory_id === 2, 'Highest impact should be first');
|
|
});
|
|
}
|
|
// ========================================================================
|
|
// ExplainableRecall Tests
|
|
// ========================================================================
|
|
testCertificateCreation() {
|
|
this.runTest('Certificate Creation and Storage', () => {
|
|
// Insert test episodes
|
|
const epStmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success)
|
|
VALUES (?, ?, ?, ?)
|
|
`);
|
|
for (let i = 1; i <= 5; i++) {
|
|
epStmt.run(`session${i}`, `task${i}`, 0.8, 1);
|
|
}
|
|
// Create certificate
|
|
const certId = crypto.randomUUID();
|
|
const chunkIds = ['1', '2', '3', '4'];
|
|
const minimalWhy = ['1', '2']; // Minimal hitting set
|
|
const merkleRoot = crypto.createHash('sha256').update('test').digest('hex');
|
|
const certStmt = this.db.prepare(`
|
|
INSERT INTO recall_certificates (
|
|
id, query_id, query_text, chunk_ids, chunk_types,
|
|
minimal_why, redundancy_ratio, completeness_score,
|
|
merkle_root, source_hashes, access_level, latency_ms
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
certStmt.run(certId, 'q1', 'Test query', JSON.stringify(chunkIds), JSON.stringify(['episode', 'episode', 'episode', 'episode']), JSON.stringify(minimalWhy), chunkIds.length / minimalWhy.length, // redundancy ratio = 2
|
|
1.0, // completeness
|
|
merkleRoot, JSON.stringify([merkleRoot, merkleRoot, merkleRoot, merkleRoot]), 'internal', 10);
|
|
// Verify
|
|
const cert = this.db.prepare('SELECT * FROM recall_certificates WHERE id = ?').get(certId);
|
|
assert(cert !== null, 'Certificate should exist');
|
|
assert(cert.query_id === 'q1', 'Query ID should match');
|
|
assert(JSON.parse(cert.chunk_ids).length === 4, 'Should have 4 chunks');
|
|
assert(JSON.parse(cert.minimal_why).length === 2, 'Minimal set should be 2');
|
|
assert(cert.redundancy_ratio === 2, 'Redundancy ratio should be 2');
|
|
assert(cert.completeness_score === 1.0, 'Completeness should be 1.0');
|
|
assert(cert.merkle_root === merkleRoot, 'Merkle root should match');
|
|
});
|
|
}
|
|
testMerkleProofGeneration() {
|
|
this.runTest('Merkle Proof Generation and Verification', () => {
|
|
// Create Merkle tree
|
|
const leaves = ['chunk1', 'chunk2', 'chunk3', 'chunk4'].map(content => crypto.createHash('sha256').update(content).digest('hex'));
|
|
// Build tree (simplified 2-level tree)
|
|
const level1 = [];
|
|
for (let i = 0; i < leaves.length; i += 2) {
|
|
const combined = leaves[i] + (leaves[i + 1] || leaves[i]);
|
|
level1.push(crypto.createHash('sha256').update(combined).digest('hex'));
|
|
}
|
|
const root = crypto.createHash('sha256')
|
|
.update(level1[0] + (level1[1] || level1[0]))
|
|
.digest('hex');
|
|
assert(root.length === 64, 'Merkle root should be 64 hex characters');
|
|
assert(/^[0-9a-f]{64}$/.test(root), 'Merkle root should be valid SHA-256 hash');
|
|
log(` Merkle root: ${root.substring(0, 16)}...`, 'cyan');
|
|
});
|
|
}
|
|
testProvenanceTracking() {
|
|
this.runTest('Provenance Source Tracking', () => {
|
|
// Insert test episodes
|
|
const epStmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success)
|
|
VALUES (?, ?, ?, ?)
|
|
`);
|
|
for (let i = 1; i <= 3; i++) {
|
|
epStmt.run(`session${i}`, `task${i}`, 0.8, 1);
|
|
}
|
|
// Create provenance sources
|
|
const provStmt = this.db.prepare(`
|
|
INSERT INTO provenance_sources (
|
|
source_type, source_id, content_hash, creator
|
|
) VALUES (?, ?, ?, ?)
|
|
`);
|
|
const hash1 = crypto.createHash('sha256').update('content1').digest('hex');
|
|
const hash2 = crypto.createHash('sha256').update('content2').digest('hex');
|
|
const result1 = provStmt.run('episode', 1, hash1, 'test_user');
|
|
const result2 = provStmt.run('episode', 2, hash2, 'test_user');
|
|
assert(result1.lastInsertRowid > 0, 'First provenance should be inserted');
|
|
assert(result2.lastInsertRowid > 0, 'Second provenance should be inserted');
|
|
// Query lineage
|
|
const lineage = this.db.prepare(`
|
|
SELECT * FROM provenance_sources
|
|
WHERE content_hash = ?
|
|
`).get(hash1);
|
|
assert(lineage !== null, 'Lineage should exist');
|
|
assert(lineage.source_type === 'episode', 'Source type should match');
|
|
assert(lineage.creator === 'test_user', 'Creator should match');
|
|
});
|
|
}
|
|
testJustificationPaths() {
|
|
this.runTest('Justification Paths', () => {
|
|
// Create certificate first
|
|
const certId = crypto.randomUUID();
|
|
this.db.prepare(`
|
|
INSERT INTO recall_certificates (
|
|
id, query_id, query_text, chunk_ids, chunk_types,
|
|
minimal_why, redundancy_ratio, completeness_score,
|
|
merkle_root, source_hashes, access_level
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
`).run(certId, 'q2', 'Test query', JSON.stringify(['1', '2']), JSON.stringify(['episode', 'episode']), JSON.stringify(['1']), 2, 1.0, 'hash', JSON.stringify(['hash1', 'hash2']), 'internal');
|
|
// Add justification paths
|
|
const justStmt = this.db.prepare(`
|
|
INSERT INTO justification_paths (
|
|
certificate_id, chunk_id, chunk_type, reason, necessity_score, path_elements
|
|
) VALUES (?, ?, ?, ?, ?, ?)
|
|
`);
|
|
justStmt.run(certId, '1', 'episode', 'semantic_match', 0.9, JSON.stringify(['query', 'chunk1']));
|
|
justStmt.run(certId, '2', 'episode', 'causal_link', 0.7, JSON.stringify(['query', 'chunk1', 'chunk2']));
|
|
// Query justifications
|
|
const justifications = this.db.prepare(`
|
|
SELECT * FROM justification_paths
|
|
WHERE certificate_id = ?
|
|
`).all(certId);
|
|
assert(justifications.length === 2, 'Should have 2 justification paths');
|
|
assert(justifications[0].reason === 'semantic_match', 'First reason should match');
|
|
assert(justifications[1].reason === 'causal_link', 'Second reason should match');
|
|
});
|
|
}
|
|
// ========================================================================
|
|
// Integration Tests
|
|
// ========================================================================
|
|
testCausalRecallIntegration() {
|
|
this.runTest('Causal Recall Integration', () => {
|
|
// Insert episodes with embeddings
|
|
const epStmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success, latency_ms)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
`);
|
|
for (let i = 1; i <= 10; i++) {
|
|
epStmt.run(`session${i}`, `task${i}`, 0.7 + i * 0.02, 1, 50 + i * 5);
|
|
}
|
|
// Add embeddings
|
|
const embStmt = this.db.prepare(`
|
|
INSERT INTO episode_embeddings (episode_id, embedding)
|
|
VALUES (?, ?)
|
|
`);
|
|
for (let i = 1; i <= 10; i++) {
|
|
// Create mock embedding
|
|
const embedding = new Float32Array(384);
|
|
for (let j = 0; j < 384; j++) {
|
|
embedding[j] = Math.random();
|
|
}
|
|
embStmt.run(i, JSON.stringify(Array.from(embedding)));
|
|
}
|
|
// Add causal edges
|
|
const edgeStmt = this.db.prepare(`
|
|
INSERT INTO causal_edges (
|
|
from_memory_id, from_memory_type, to_memory_id, to_memory_type,
|
|
similarity, uplift, confidence
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
edgeStmt.run(1, 'episode', 2, 'episode', 0.8, 0.15, 0.9);
|
|
edgeStmt.run(2, 'episode', 3, 'episode', 0.75, 0.2, 0.85);
|
|
// Verify integration
|
|
const episodes = this.db.prepare('SELECT COUNT(*) as count FROM episodes').get();
|
|
const embeddings = this.db.prepare('SELECT COUNT(*) as count FROM episode_embeddings').get();
|
|
const edges = this.db.prepare('SELECT COUNT(*) as count FROM causal_edges').get();
|
|
assert(episodes.count === 10, 'Should have 10 episodes');
|
|
assert(embeddings.count === 10, 'Should have 10 embeddings');
|
|
assert(edges.count === 2, 'Should have 2 causal edges');
|
|
log(` Episodes: ${episodes.count}, Embeddings: ${embeddings.count}, Edges: ${edges.count}`, 'cyan');
|
|
});
|
|
}
|
|
testDatabasePerformance() {
|
|
this.runTest('Database Performance', () => {
|
|
// Insert many episodes for performance test
|
|
const start = Date.now();
|
|
this.db.transaction(() => {
|
|
const stmt = this.db.prepare(`
|
|
INSERT INTO episodes (session_id, task, reward, success)
|
|
VALUES (?, ?, ?, ?)
|
|
`);
|
|
for (let i = 1; i <= 1000; i++) {
|
|
stmt.run(`session${i}`, `task${i}`, Math.random(), 1);
|
|
}
|
|
})();
|
|
const insertTime = Date.now() - start;
|
|
// Query performance
|
|
const queryStart = Date.now();
|
|
const results = this.db.prepare('SELECT * FROM episodes WHERE reward > 0.5 LIMIT 100').all();
|
|
const queryTime = Date.now() - queryStart;
|
|
assert(results.length > 0, 'Should find episodes with reward > 0.5');
|
|
assert(insertTime < 1000, 'Bulk insert should be fast (< 1s for 1000 records)');
|
|
assert(queryTime < 50, 'Query should be fast (< 50ms)');
|
|
log(` Insert 1000: ${insertTime}ms, Query: ${queryTime}ms`, 'cyan');
|
|
});
|
|
}
|
|
// ========================================================================
|
|
// Summary
|
|
// ========================================================================
|
|
runAll() {
|
|
log('\n═══════════════════════════════════════════════════════════', 'cyan');
|
|
log(' AgentDB Frontier Features Validation', 'cyan');
|
|
log('═══════════════════════════════════════════════════════════', 'cyan');
|
|
// CausalMemoryGraph tests
|
|
log('\n📊 Testing CausalMemoryGraph', 'yellow');
|
|
this.testCausalEdgeInsertion();
|
|
this.testABExperiment();
|
|
this.testCausalQuery();
|
|
// ExplainableRecall tests
|
|
log('\n🔍 Testing ExplainableRecall', 'yellow');
|
|
this.testCertificateCreation();
|
|
this.testMerkleProofGeneration();
|
|
this.testProvenanceTracking();
|
|
this.testJustificationPaths();
|
|
// Integration tests
|
|
log('\n🔗 Testing Integration', 'yellow');
|
|
this.testCausalRecallIntegration();
|
|
this.testDatabasePerformance();
|
|
// Print summary
|
|
log('\n═══════════════════════════════════════════════════════════', 'cyan');
|
|
log(' Summary', 'cyan');
|
|
log('═══════════════════════════════════════════════════════════', 'cyan');
|
|
log(`\n Total Tests: ${this.testsRun}`, 'cyan');
|
|
log(` Passed: ${this.testsPassed}`, 'green');
|
|
log(` Failed: ${this.testsFailed}`, this.testsFailed > 0 ? 'red' : 'cyan');
|
|
const passRate = ((this.testsPassed / this.testsRun) * 100).toFixed(1);
|
|
log(` Pass Rate: ${passRate}%`, passRate === '100.0' ? 'green' : 'yellow');
|
|
if (this.testsFailed === 0) {
|
|
log('\n✅ ALL TESTS PASSED - FRONTIER FEATURES VALIDATED', 'green');
|
|
}
|
|
else {
|
|
log(`\n❌ ${this.testsFailed} TEST(S) FAILED`, 'red');
|
|
}
|
|
log('═══════════════════════════════════════════════════════════\n', 'cyan');
|
|
this.db.close();
|
|
return this.testsFailed === 0 ? 0 : 1;
|
|
}
|
|
}
|
|
// Run validation
|
|
const validator = new FrontierValidator();
|
|
const exitCode = validator.runAll();
|
|
process.exit(exitCode);
|
|
//# sourceMappingURL=validate-frontier.cjs.map
|