tasq/node_modules/agentdb/dist/schemas/frontier-schema.sql

386 lines
13 KiB
SQL

-- ============================================================================
-- AgentDB Frontier Features Schema Extension
-- ============================================================================
-- Implements cutting-edge memory features:
-- 1. Causal Memory Graph - Store edges with causal strength, not just similarity
-- 2. Explainable Recall Certificates - Provenance and justification tracking
-- ============================================================================
-- ============================================================================
-- FEATURE 1: Causal Memory Graph
-- ============================================================================
-- Causal edges between memories with intervention effects
CREATE TABLE IF NOT EXISTS causal_edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
from_memory_id INTEGER NOT NULL,
from_memory_type TEXT NOT NULL, -- 'episode', 'skill', 'note', 'fact'
to_memory_id INTEGER NOT NULL,
to_memory_type TEXT NOT NULL,
-- Traditional similarity
similarity REAL NOT NULL DEFAULT 0.0,
-- Causal metrics
uplift REAL, -- E[y|do(x)] - E[y]
confidence REAL DEFAULT 0.5, -- Confidence in causal claim
sample_size INTEGER, -- Number of observations
-- Evidence and provenance
evidence_ids TEXT, -- JSON array of proof IDs
experiment_ids TEXT, -- JSON array of A/B test IDs
confounder_score REAL, -- Likelihood of confounding
-- Metadata
mechanism TEXT, -- Hypothesized causal mechanism
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
last_validated_at INTEGER,
metadata JSON
-- Note: Foreign keys removed to allow flexible causal edges between any concepts
-- from_memory_id and to_memory_id can be 0 for abstract causal relationships
);
CREATE INDEX IF NOT EXISTS idx_causal_edges_from ON causal_edges(from_memory_id, from_memory_type);
CREATE INDEX IF NOT EXISTS idx_causal_edges_to ON causal_edges(to_memory_id, to_memory_type);
CREATE INDEX IF NOT EXISTS idx_causal_edges_uplift ON causal_edges(uplift DESC);
CREATE INDEX IF NOT EXISTS idx_causal_edges_confidence ON causal_edges(confidence DESC);
-- Causal experiments (A/B tests for uplift estimation)
CREATE TABLE IF NOT EXISTS causal_experiments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
hypothesis TEXT NOT NULL,
treatment_id INTEGER NOT NULL, -- Memory used as treatment
treatment_type TEXT NOT NULL,
control_id INTEGER, -- Optional control memory
-- Experiment design
start_time INTEGER NOT NULL,
end_time INTEGER,
sample_size INTEGER DEFAULT 0,
-- Results
treatment_mean REAL,
control_mean REAL,
uplift REAL, -- treatment_mean - control_mean
p_value REAL,
confidence_interval_low REAL,
confidence_interval_high REAL,
-- Status
status TEXT DEFAULT 'running', -- 'running', 'completed', 'failed'
metadata JSON,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_causal_experiments_status ON causal_experiments(status);
CREATE INDEX IF NOT EXISTS idx_causal_experiments_treatment ON causal_experiments(treatment_id, treatment_type);
-- Causal observations (individual data points)
CREATE TABLE IF NOT EXISTS causal_observations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
experiment_id INTEGER NOT NULL,
episode_id INTEGER NOT NULL,
-- Treatment assignment
is_treatment BOOLEAN NOT NULL,
-- Outcome
outcome_value REAL NOT NULL,
outcome_type TEXT, -- 'reward', 'success', 'latency'
-- Context
context JSON,
observed_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
FOREIGN KEY(experiment_id) REFERENCES causal_experiments(id) ON DELETE CASCADE,
FOREIGN KEY(episode_id) REFERENCES episodes(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_causal_observations_experiment ON causal_observations(experiment_id);
CREATE INDEX IF NOT EXISTS idx_causal_observations_treatment ON causal_observations(is_treatment);
-- ============================================================================
-- FEATURE 2: Explainable Recall Certificates
-- ============================================================================
-- Recall certificates for provenance and justification
CREATE TABLE IF NOT EXISTS recall_certificates (
id TEXT PRIMARY KEY, -- UUID or hash
query_id TEXT NOT NULL,
query_text TEXT NOT NULL,
-- Retrieved chunks
chunk_ids TEXT NOT NULL, -- JSON array
chunk_types TEXT NOT NULL, -- JSON array matching chunk_ids
-- Minimal hitting set (justification)
minimal_why TEXT, -- JSON array of chunk IDs that justify the answer
redundancy_ratio REAL, -- len(chunk_ids) / len(minimal_why)
completeness_score REAL, -- Fraction of query requirements met
-- Provenance chain
merkle_root TEXT NOT NULL,
source_hashes TEXT, -- JSON array of source hashes
proof_chain TEXT, -- JSON Merkle proof
-- Policy compliance
policy_proof TEXT, -- Proof of policy adherence
policy_version TEXT,
access_level TEXT, -- 'public', 'internal', 'confidential', 'restricted'
-- Metadata
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
latency_ms INTEGER,
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_recall_certificates_query ON recall_certificates(query_id);
CREATE INDEX IF NOT EXISTS idx_recall_certificates_created ON recall_certificates(created_at DESC);
-- Provenance sources (for Merkle tree construction)
CREATE TABLE IF NOT EXISTS provenance_sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_type TEXT NOT NULL, -- 'episode', 'skill', 'note', 'fact', 'external'
source_id INTEGER NOT NULL,
-- Content hash
content_hash TEXT NOT NULL UNIQUE,
-- Lineage
parent_hash TEXT, -- Previous version
derived_from TEXT, -- JSON array of parent hashes
-- Metadata
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
creator TEXT, -- User or system identifier
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_provenance_sources_type ON provenance_sources(source_type, source_id);
CREATE INDEX IF NOT EXISTS idx_provenance_sources_hash ON provenance_sources(content_hash);
CREATE INDEX IF NOT EXISTS idx_provenance_sources_parent ON provenance_sources(parent_hash);
-- Justification paths (why a chunk was included)
CREATE TABLE IF NOT EXISTS justification_paths (
id INTEGER PRIMARY KEY AUTOINCREMENT,
certificate_id TEXT NOT NULL,
chunk_id TEXT NOT NULL,
chunk_type TEXT NOT NULL,
-- Justification
reason TEXT NOT NULL, -- 'semantic_match', 'causal_link', 'prerequisite', 'constraint'
necessity_score REAL NOT NULL, -- How essential is this chunk (0-1)
-- Path to query satisfaction
path_elements TEXT, -- JSON array describing reasoning path
FOREIGN KEY(certificate_id) REFERENCES recall_certificates(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_justification_paths_cert ON justification_paths(certificate_id);
CREATE INDEX IF NOT EXISTS idx_justification_paths_chunk ON justification_paths(chunk_id, chunk_type);
-- ============================================================================
-- Views for Causal Analysis
-- ============================================================================
-- High-confidence causal relationships
CREATE VIEW IF NOT EXISTS strong_causal_edges AS
SELECT
ce.*,
CASE
WHEN ce.uplift > 0 THEN 'positive'
WHEN ce.uplift < 0 THEN 'negative'
ELSE 'neutral'
END as effect_direction,
ce.uplift * ce.confidence as causal_impact
FROM causal_edges ce
WHERE ce.confidence >= 0.7
AND ce.uplift IS NOT NULL
AND ABS(ce.uplift) >= 0.1
ORDER BY ABS(ce.uplift) * ce.confidence DESC;
-- Causal chains (multi-hop reasoning)
CREATE VIEW IF NOT EXISTS causal_chains AS
WITH RECURSIVE chain(from_id, to_id, depth, path, total_uplift) AS (
SELECT from_memory_id, to_memory_id, 1,
from_memory_id || '->' || to_memory_id,
uplift
FROM causal_edges
WHERE confidence >= 0.6
UNION ALL
SELECT chain.from_id, ce.to_memory_id, chain.depth + 1,
chain.path || '->' || ce.to_memory_id,
chain.total_uplift + ce.uplift
FROM chain
JOIN causal_edges ce ON chain.to_id = ce.from_memory_id
WHERE chain.depth < 5
AND ce.confidence >= 0.6
AND chain.path NOT LIKE '%' || ce.to_memory_id || '%'
)
SELECT * FROM chain
WHERE depth >= 2
ORDER BY total_uplift DESC, depth ASC;
-- ============================================================================
-- Views for Explainability
-- ============================================================================
-- Certificate quality metrics
CREATE VIEW IF NOT EXISTS certificate_quality AS
SELECT
rc.id,
rc.query_id,
rc.completeness_score,
rc.redundancy_ratio,
COUNT(jp.id) as justification_count,
AVG(jp.necessity_score) as avg_necessity,
rc.latency_ms
FROM recall_certificates rc
LEFT JOIN justification_paths jp ON rc.id = jp.certificate_id
GROUP BY rc.id;
-- Provenance lineage depth
CREATE VIEW IF NOT EXISTS provenance_depth AS
WITH RECURSIVE lineage(hash, depth) AS (
SELECT content_hash, 0
FROM provenance_sources
WHERE parent_hash IS NULL
UNION ALL
SELECT ps.content_hash, lineage.depth + 1
FROM lineage
JOIN provenance_sources ps ON lineage.hash = ps.parent_hash
)
SELECT
ps.id,
ps.source_type,
ps.source_id,
ps.content_hash,
COALESCE(l.depth, 0) as lineage_depth
FROM provenance_sources ps
LEFT JOIN lineage l ON ps.content_hash = l.hash;
-- ============================================================================
-- Triggers for Automatic Maintenance
-- ============================================================================
-- Update causal edge timestamp
CREATE TRIGGER IF NOT EXISTS update_causal_edge_timestamp
AFTER UPDATE ON causal_edges
BEGIN
UPDATE causal_edges
SET updated_at = strftime('%s', 'now')
WHERE id = NEW.id;
END;
-- Validate causal confidence bounds
CREATE TRIGGER IF NOT EXISTS validate_causal_confidence
BEFORE INSERT ON causal_edges
BEGIN
SELECT CASE
WHEN NEW.confidence < 0 OR NEW.confidence > 1 THEN
RAISE(ABORT, 'Confidence must be between 0 and 1')
END;
END;
-- ============================================================================
-- Functions for Causal Inference (as SQL helpers)
-- ============================================================================
-- These would typically be implemented in TypeScript, but we provide
-- SQL views that can assist with common causal queries
-- Instrumental variables (potential instruments for causal inference)
CREATE VIEW IF NOT EXISTS causal_instruments AS
SELECT
e1.id as instrument_id,
e1.task as instrument,
e2.id as treatment_id,
e2.task as treatment,
e3.id as outcome_id,
e3.task as outcome
FROM episodes e1
CROSS JOIN episodes e2
CROSS JOIN episodes e3
WHERE e1.id != e2.id AND e2.id != e3.id AND e1.id != e3.id
-- Instrument affects treatment
AND EXISTS (
SELECT 1 FROM causal_edges
WHERE from_memory_id = e1.id AND to_memory_id = e2.id
AND ABS(uplift) > 0.1
)
-- Treatment affects outcome
AND EXISTS (
SELECT 1 FROM causal_edges
WHERE from_memory_id = e2.id AND to_memory_id = e3.id
AND ABS(uplift) > 0.1
)
-- Instrument doesn't directly affect outcome (exclusion restriction)
AND NOT EXISTS (
SELECT 1 FROM causal_edges
WHERE from_memory_id = e1.id AND to_memory_id = e3.id
);
-- ============================================================================
-- Schema Version
-- ============================================================================
-- Version: 2.0.0 (Frontier Features)
-- Features: Causal Memory Graph, Explainable Recall Certificates
-- Compatible with: AgentDB 1.x
--
-- Performance Optimization:
-- Apply composite index migration (003_composite_indexes.sql) for:
-- - 30-50% faster causal edge queries
-- - Optimized causal chain traversal (multi-hop reasoning)
-- - Faster experiment analysis and A/B testing
-- - See: db/migrations/README.md for details
-- ============================================================================
-- ============================================================================
-- FEATURE 3: Reinforcement Learning Experiences
-- ============================================================================
-- Learning experiences for RL training
CREATE TABLE IF NOT EXISTS learning_experiences (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
state TEXT NOT NULL,
action TEXT NOT NULL,
reward REAL NOT NULL,
next_state TEXT,
success INTEGER NOT NULL DEFAULT 0,
timestamp INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_learning_experiences_session ON learning_experiences(session_id);
CREATE INDEX IF NOT EXISTS idx_learning_experiences_timestamp ON learning_experiences(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_learning_experiences_reward ON learning_experiences(reward DESC);
-- Learning sessions table for RL session management
CREATE TABLE IF NOT EXISTS learning_sessions (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
session_type TEXT NOT NULL,
config JSON NOT NULL,
start_time INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
end_time INTEGER,
status TEXT NOT NULL DEFAULT 'active',
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_learning_sessions_user ON learning_sessions(user_id);
CREATE INDEX IF NOT EXISTS idx_learning_sessions_status ON learning_sessions(status);
CREATE INDEX IF NOT EXISTS idx_learning_sessions_start ON learning_sessions(start_time DESC);