tasq/node_modules/agentdb/dist/schemas/schema.sql

390 lines
15 KiB
SQL

-- ============================================================================
-- AgentDB State-of-the-Art Memory Schema
-- ============================================================================
-- Implements 5 cutting-edge memory patterns for autonomous agents:
-- 1. Reflexion-style episodic replay
-- 2. Skill library from trajectories
-- 3. Structured mixed memory (facts + summaries)
-- 4. Episodic segmentation and consolidation
-- 5. Graph-aware recall
-- ============================================================================
-- Enable foreign keys
PRAGMA foreign_keys = ON;
-- ============================================================================
-- Pattern 1: Reflexion-Style Episodic Replay
-- ============================================================================
-- Store self-critique and outcomes after each attempt.
-- Retrieve nearest failures and fixes before the next run.
CREATE TABLE IF NOT EXISTS episodes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
session_id TEXT NOT NULL,
task TEXT NOT NULL,
input TEXT,
output TEXT,
critique TEXT,
reward REAL DEFAULT 0.0,
success BOOLEAN DEFAULT 0,
latency_ms INTEGER,
tokens_used INTEGER,
tags TEXT, -- JSON array of tags
metadata JSON,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_episodes_ts ON episodes(ts DESC);
CREATE INDEX IF NOT EXISTS idx_episodes_session ON episodes(session_id);
CREATE INDEX IF NOT EXISTS idx_episodes_reward ON episodes(reward DESC);
CREATE INDEX IF NOT EXISTS idx_episodes_task ON episodes(task);
-- Vector embeddings for episodes (384-dim for all-MiniLM-L6-v2)
-- Will use sqlite-vec when available, fallback to JSON storage
CREATE TABLE IF NOT EXISTS episode_embeddings (
episode_id INTEGER PRIMARY KEY,
embedding BLOB NOT NULL, -- Float32Array as BLOB
embedding_model TEXT DEFAULT 'all-MiniLM-L6-v2',
FOREIGN KEY(episode_id) REFERENCES episodes(id) ON DELETE CASCADE
);
-- ============================================================================
-- Pattern 2: Skill Library from Trajectories
-- ============================================================================
-- Promote high-reward traces into reusable "skills" with typed IO.
CREATE TABLE IF NOT EXISTS skills (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT UNIQUE NOT NULL,
description TEXT,
signature JSON NOT NULL, -- {inputs: {...}, outputs: {...}}
code TEXT, -- Tool call manifest or code template
success_rate REAL DEFAULT 0.0,
uses INTEGER DEFAULT 0,
avg_reward REAL DEFAULT 0.0,
avg_latency_ms INTEGER DEFAULT 0,
created_from_episode INTEGER, -- Source episode ID
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
last_used_at INTEGER,
metadata JSON,
FOREIGN KEY(created_from_episode) REFERENCES episodes(id)
);
CREATE INDEX IF NOT EXISTS idx_skills_success ON skills(success_rate DESC);
CREATE INDEX IF NOT EXISTS idx_skills_uses ON skills(uses DESC);
CREATE INDEX IF NOT EXISTS idx_skills_name ON skills(name);
-- Skill relationships and composition
CREATE TABLE IF NOT EXISTS skill_links (
id INTEGER PRIMARY KEY AUTOINCREMENT,
parent_skill_id INTEGER NOT NULL,
child_skill_id INTEGER NOT NULL,
relationship TEXT NOT NULL, -- 'prerequisite', 'alternative', 'refinement', 'composition'
weight REAL DEFAULT 1.0,
metadata JSON,
FOREIGN KEY(parent_skill_id) REFERENCES skills(id) ON DELETE CASCADE,
FOREIGN KEY(child_skill_id) REFERENCES skills(id) ON DELETE CASCADE,
UNIQUE(parent_skill_id, child_skill_id, relationship)
);
CREATE INDEX IF NOT EXISTS idx_skill_links_parent ON skill_links(parent_skill_id);
CREATE INDEX IF NOT EXISTS idx_skill_links_child ON skill_links(child_skill_id);
-- Skill embeddings for semantic search
CREATE TABLE IF NOT EXISTS skill_embeddings (
skill_id INTEGER PRIMARY KEY,
embedding BLOB NOT NULL,
embedding_model TEXT DEFAULT 'all-MiniLM-L6-v2',
FOREIGN KEY(skill_id) REFERENCES skills(id) ON DELETE CASCADE
);
-- ============================================================================
-- Pattern 3: Structured Mixed Memory (Facts + Summaries)
-- ============================================================================
-- Combine facts, summaries, and vectors to avoid over-embedding.
-- Atomic facts as triples (subject-predicate-object)
CREATE TABLE IF NOT EXISTS facts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
subject TEXT NOT NULL,
predicate TEXT NOT NULL,
object TEXT NOT NULL,
source_type TEXT, -- 'episode', 'skill', 'external', 'inferred'
source_id INTEGER,
confidence REAL DEFAULT 1.0,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
expires_at INTEGER, -- TTL for temporal facts
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_facts_subject ON facts(subject);
CREATE INDEX IF NOT EXISTS idx_facts_predicate ON facts(predicate);
CREATE INDEX IF NOT EXISTS idx_facts_object ON facts(object);
CREATE INDEX IF NOT EXISTS idx_facts_source ON facts(source_type, source_id);
CREATE INDEX IF NOT EXISTS idx_facts_expires ON facts(expires_at) WHERE expires_at IS NOT NULL;
-- Notes and summaries with semantic embeddings
CREATE TABLE IF NOT EXISTS notes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT,
text TEXT NOT NULL,
summary TEXT, -- Condensed version for context
note_type TEXT DEFAULT 'general', -- 'insight', 'constraint', 'goal', 'observation'
importance REAL DEFAULT 0.5,
access_count INTEGER DEFAULT 0,
last_accessed_at INTEGER,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_notes_type ON notes(note_type);
CREATE INDEX IF NOT EXISTS idx_notes_importance ON notes(importance DESC);
CREATE INDEX IF NOT EXISTS idx_notes_accessed ON notes(last_accessed_at DESC);
-- Note embeddings (only for summaries to reduce storage)
CREATE TABLE IF NOT EXISTS note_embeddings (
note_id INTEGER PRIMARY KEY,
embedding BLOB NOT NULL,
embedding_model TEXT DEFAULT 'all-MiniLM-L6-v2',
FOREIGN KEY(note_id) REFERENCES notes(id) ON DELETE CASCADE
);
-- ============================================================================
-- Pattern 4: Episodic Segmentation and Consolidation
-- ============================================================================
-- Segment long tasks into events and consolidate into compact memories.
CREATE TABLE IF NOT EXISTS events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
episode_id INTEGER, -- Link to parent episode
step INTEGER NOT NULL,
phase TEXT, -- 'planning', 'execution', 'reflection', 'learning'
role TEXT, -- 'user', 'assistant', 'system', 'tool'
content TEXT NOT NULL,
features JSON, -- Extracted features for learning
tool_calls JSON, -- Tool invocations in this event
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
FOREIGN KEY(episode_id) REFERENCES episodes(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_events_session ON events(session_id, step);
CREATE INDEX IF NOT EXISTS idx_events_phase ON events(phase);
CREATE INDEX IF NOT EXISTS idx_events_episode ON events(episode_id);
-- Consolidated memories from event windows
CREATE TABLE IF NOT EXISTS consolidated_memories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL,
start_event_id INTEGER NOT NULL,
end_event_id INTEGER NOT NULL,
phase TEXT,
summary TEXT NOT NULL,
key_insights JSON, -- Extracted learnings
success_patterns JSON, -- What worked
failure_patterns JSON, -- What didn't work
quality_score REAL DEFAULT 0.5,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
FOREIGN KEY(start_event_id) REFERENCES events(id),
FOREIGN KEY(end_event_id) REFERENCES events(id)
);
CREATE INDEX IF NOT EXISTS idx_consolidated_session ON consolidated_memories(session_id);
CREATE INDEX IF NOT EXISTS idx_consolidated_quality ON consolidated_memories(quality_score DESC);
-- ============================================================================
-- Pattern 5: Graph-Aware Recall (Lightweight GraphRAG)
-- ============================================================================
-- Build a lightweight GraphRAG overlay for experiences.
CREATE TABLE IF NOT EXISTS exp_nodes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
kind TEXT NOT NULL, -- 'task', 'skill', 'concept', 'tool', 'outcome'
label TEXT NOT NULL,
payload JSON,
centrality REAL DEFAULT 0.0, -- Graph importance metric
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
updated_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_exp_nodes_kind ON exp_nodes(kind);
CREATE INDEX IF NOT EXISTS idx_exp_nodes_label ON exp_nodes(label);
CREATE INDEX IF NOT EXISTS idx_exp_nodes_centrality ON exp_nodes(centrality DESC);
CREATE TABLE IF NOT EXISTS exp_edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
src_node_id INTEGER NOT NULL,
dst_node_id INTEGER NOT NULL,
relationship TEXT NOT NULL, -- 'requires', 'produces', 'similar_to', 'refines', 'part_of'
weight REAL DEFAULT 1.0,
metadata JSON,
created_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
FOREIGN KEY(src_node_id) REFERENCES exp_nodes(id) ON DELETE CASCADE,
FOREIGN KEY(dst_node_id) REFERENCES exp_nodes(id) ON DELETE CASCADE,
UNIQUE(src_node_id, dst_node_id, relationship)
);
CREATE INDEX IF NOT EXISTS idx_exp_edges_src ON exp_edges(src_node_id);
CREATE INDEX IF NOT EXISTS idx_exp_edges_dst ON exp_edges(dst_node_id);
CREATE INDEX IF NOT EXISTS idx_exp_edges_rel ON exp_edges(relationship);
-- Node embeddings for graph-augmented retrieval
CREATE TABLE IF NOT EXISTS exp_node_embeddings (
node_id INTEGER PRIMARY KEY,
embedding BLOB NOT NULL,
embedding_model TEXT DEFAULT 'all-MiniLM-L6-v2',
FOREIGN KEY(node_id) REFERENCES exp_nodes(id) ON DELETE CASCADE
);
-- ============================================================================
-- Memory Management and Scoring
-- ============================================================================
-- Track memory quality scores and usage statistics
CREATE TABLE IF NOT EXISTS memory_scores (
id INTEGER PRIMARY KEY AUTOINCREMENT,
memory_type TEXT NOT NULL, -- 'episode', 'skill', 'note', 'consolidated'
memory_id INTEGER NOT NULL,
quality_score REAL NOT NULL,
novelty_score REAL,
relevance_score REAL,
utility_score REAL,
computed_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now')),
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_memory_scores_type ON memory_scores(memory_type, memory_id);
CREATE INDEX IF NOT EXISTS idx_memory_scores_quality ON memory_scores(quality_score DESC);
-- Memory access patterns for adaptive retrieval
CREATE TABLE IF NOT EXISTS memory_access_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
memory_type TEXT NOT NULL,
memory_id INTEGER NOT NULL,
query TEXT,
relevance_score REAL,
was_useful BOOLEAN,
feedback JSON,
accessed_at INTEGER NOT NULL DEFAULT (strftime('%s', 'now'))
);
CREATE INDEX IF NOT EXISTS idx_access_log_type ON memory_access_log(memory_type, memory_id);
CREATE INDEX IF NOT EXISTS idx_access_log_time ON memory_access_log(accessed_at DESC);
-- ============================================================================
-- Consolidation and Maintenance
-- ============================================================================
-- Track consolidation jobs and their results
CREATE TABLE IF NOT EXISTS consolidation_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_type TEXT NOT NULL, -- 'episode_to_skill', 'event_to_memory', 'deduplication', 'pruning'
records_processed INTEGER DEFAULT 0,
records_created INTEGER DEFAULT 0,
records_deleted INTEGER DEFAULT 0,
duration_ms INTEGER,
status TEXT DEFAULT 'pending', -- 'pending', 'running', 'completed', 'failed'
error TEXT,
started_at INTEGER,
completed_at INTEGER,
metadata JSON
);
CREATE INDEX IF NOT EXISTS idx_consolidation_status ON consolidation_runs(status);
CREATE INDEX IF NOT EXISTS idx_consolidation_type ON consolidation_runs(job_type);
-- ============================================================================
-- Views for Common Queries
-- ============================================================================
-- High-value episodes for skill creation
CREATE VIEW IF NOT EXISTS skill_candidates AS
SELECT
task,
COUNT(*) as attempt_count,
AVG(reward) as avg_reward,
AVG(success) as success_rate,
MAX(id) as latest_episode_id,
GROUP_CONCAT(id) as episode_ids
FROM episodes
WHERE ts > strftime('%s', 'now') - 86400 * 7 -- Last 7 days
GROUP BY task
HAVING attempt_count >= 3 AND avg_reward >= 0.7;
-- Top performing skills
CREATE VIEW IF NOT EXISTS top_skills AS
SELECT
s.*,
COALESCE(s.success_rate, 0) * 0.4 +
COALESCE(s.uses, 0) * 0.0001 +
COALESCE(s.avg_reward, 0) * 0.6 as composite_score
FROM skills s
ORDER BY composite_score DESC;
-- Recent high-quality memories
CREATE VIEW IF NOT EXISTS recent_quality_memories AS
SELECT
'episode' as type, id, task as title, critique as content, reward as score, created_at
FROM episodes
WHERE reward >= 0.7 AND ts > strftime('%s', 'now') - 86400 * 3
UNION ALL
SELECT
'note' as type, id, title, summary as content, importance as score, created_at
FROM notes
WHERE importance >= 0.7 AND created_at > strftime('%s', 'now') - 86400 * 3
UNION ALL
SELECT
'consolidated' as type, id, session_id as title, summary as content, quality_score as score, created_at
FROM consolidated_memories
WHERE quality_score >= 0.7 AND created_at > strftime('%s', 'now') - 86400 * 3
ORDER BY created_at DESC;
-- ============================================================================
-- Triggers for Auto-Maintenance
-- ============================================================================
-- Update skill usage statistics
CREATE TRIGGER IF NOT EXISTS update_skill_last_used
AFTER UPDATE OF uses ON skills
BEGIN
UPDATE skills SET last_used_at = strftime('%s', 'now') WHERE id = NEW.id;
END;
-- Update note access tracking
CREATE TRIGGER IF NOT EXISTS update_note_access
AFTER UPDATE OF access_count ON notes
BEGIN
UPDATE notes SET last_accessed_at = strftime('%s', 'now') WHERE id = NEW.id;
END;
-- Auto-update timestamps
CREATE TRIGGER IF NOT EXISTS update_skill_timestamp
AFTER UPDATE ON skills
BEGIN
UPDATE skills SET updated_at = strftime('%s', 'now') WHERE id = NEW.id;
END;
CREATE TRIGGER IF NOT EXISTS update_note_timestamp
AFTER UPDATE ON notes
BEGIN
UPDATE notes SET updated_at = strftime('%s', 'now') WHERE id = NEW.id;
END;
-- ============================================================================
-- Initialization Complete
-- ============================================================================
-- Schema version: 1.0.0
-- Compatible with: SQLite 3.35+, sqlite-vec (optional), sqlite-vss (optional)
-- WASM compatible: Yes (via SQLite-WASM + OPFS)
--
-- Performance Optimization:
-- For production deployments, apply composite index migration for 30-50% query speedup:
-- - Migration file: db/migrations/003_composite_indexes.sql
-- - Adds 40+ composite indexes for common query patterns
-- - Trade-off: 2x slower writes, +15-20% storage (acceptable for read-heavy workloads)
-- - See: db/migrations/README.md for details
-- ============================================================================