tasq/node_modules/@claude-flow/guidance/dist/retriever.js

394 lines
15 KiB
JavaScript

/**
* Task Intent Classifier + Shard Retriever
*
* Stores rule shards in vector storage with embeddings and metadata.
* At task start, retrieves the top N shards by semantic similarity
* with hard filters by risk class and repo scope.
*
* Retrieval contract:
* 1. Always include the constitution
* 2. Retrieve up to 5 shards by semantic similarity
* 3. Add hard filters by risk class and repo scope
* 4. Contradiction check: prefer higher-priority rule ID
*
* @module @claude-flow/guidance/retriever
*/
// ============================================================================
// Intent Classification
// ============================================================================
/** Intent detection patterns with confidence weights */
const INTENT_PATTERNS = {
'bug-fix': [
{ pattern: /\b(fix|bug|broken|error|crash|issue|wrong|incorrect|fail)\b/i, weight: 0.8 },
{ pattern: /\b(not working|doesn't work|unexpected|regression)\b/i, weight: 0.9 },
],
'feature': [
{ pattern: /\b(add|create|implement|build|new|introduce|develop)\b/i, weight: 0.5 },
{ pattern: /\b(feature|capability|functionality|support for)\b/i, weight: 0.9 },
{ pattern: /\b(user|page|profile|dashboard|form|widget|component|module)\b/i, weight: 0.3 },
],
'refactor': [
{ pattern: /\b(refactor|restructure|reorganize|simplify|clean|extract|inline)\b/i, weight: 0.9 },
{ pattern: /\b(improve readability|reduce complexity|code quality)\b/i, weight: 0.8 },
],
'security': [
{ pattern: /\b(security|auth|permission|access control|encrypt|secret|token)\b/i, weight: 0.9 },
{ pattern: /\b(cve|vulnerability|injection|xss|csrf|sanitize)\b/i, weight: 1.0 },
],
'performance': [
{ pattern: /\b(performance|optimize|speed|slow|fast|cache|memory usage|latency)\b/i, weight: 0.9 },
{ pattern: /\b(bottleneck|profile|benchmark|throughput|efficient)\b/i, weight: 0.8 },
],
'testing': [
{ pattern: /\b(tests?|specs?|coverage|mocks?|asserts?|tdd|unit tests?|integration tests?)\b/i, weight: 1.0 },
{ pattern: /\b(test suite|test case|test plan|quality assurance)\b/i, weight: 0.9 },
],
'docs': [
{ pattern: /\b(document|readme|jsdoc|comment|explain|describe|tutorial)\b/i, weight: 0.8 },
{ pattern: /\b(api docs|documentation|usage guide|changelog)\b/i, weight: 0.9 },
],
'deployment': [
{ pattern: /\b(deploy|release|publish|ci|cd|pipeline|docker|kubernetes)\b/i, weight: 0.9 },
{ pattern: /\b(staging|production|rollback|migration|version)\b/i, weight: 0.7 },
],
'architecture': [
{ pattern: /\b(architect|design pattern|system design|structure|boundary)\b/i, weight: 0.8 },
{ pattern: /\b(module boundary|component architecture|layer|service mesh|domain model|aggregate root)\b/i, weight: 0.7 },
{ pattern: /\b(interface|api design|separation of concerns)\b/i, weight: 0.6 },
],
'debug': [
{ pattern: /\b(debug|trace|log|diagnose|investigate|root cause)\b/i, weight: 0.9 },
{ pattern: /\b(stack trace|breakpoint|inspect|reproduction)\b/i, weight: 0.8 },
],
'general': [
{ pattern: /./, weight: 0.1 },
],
};
/**
* Deterministic hash-based embedding provider — **test-only**.
*
* Produces fixed-dimension vectors from a simple character-hash → sin()
* transform. The resulting embeddings have no real semantic meaning;
* they are stable and fast, which makes them useful for unit/integration
* tests that need a concrete {@link IEmbeddingProvider} without loading
* an ONNX model.
*
* **Do NOT use in production** — replace with a real model-backed
* provider (e.g. the agentic-flow ONNX integration).
*/
export class HashEmbeddingProvider {
dimensions;
cache = new Map();
constructor(dimensions = 384) {
this.dimensions = dimensions;
}
async embed(text) {
const key = text.slice(0, 200);
if (this.cache.has(key))
return this.cache.get(key);
const embedding = this.hashEmbed(text);
this.cache.set(key, embedding);
return embedding;
}
async batchEmbed(texts) {
return Promise.all(texts.map(t => this.embed(t)));
}
hashEmbed(text) {
const embedding = new Float32Array(this.dimensions);
const normalized = text.toLowerCase().trim();
for (let i = 0; i < this.dimensions; i++) {
let hash = 0;
for (let j = 0; j < normalized.length; j++) {
hash = ((hash << 5) - hash + normalized.charCodeAt(j) * (i + 1)) | 0;
}
embedding[i] = (Math.sin(hash) + 1) / 2;
}
// L2 normalize
let norm = 0;
for (let i = 0; i < this.dimensions; i++) {
norm += embedding[i] * embedding[i];
}
norm = Math.sqrt(norm);
if (norm > 0) {
for (let i = 0; i < this.dimensions; i++) {
embedding[i] /= norm;
}
}
return embedding;
}
}
// ============================================================================
// Shard Retriever
// ============================================================================
export class ShardRetriever {
shards = [];
constitution = null;
embeddingProvider;
indexed = false;
globCache = new Map();
constructor(embeddingProvider) {
this.embeddingProvider = embeddingProvider ?? new HashEmbeddingProvider();
}
/**
* Load a compiled policy bundle
*/
async loadBundle(bundle) {
this.constitution = bundle.constitution;
this.shards = bundle.shards;
this.indexed = false;
await this.indexShards();
}
/**
* Index all shards by generating embeddings
*/
async indexShards() {
if (this.indexed)
return;
const texts = this.shards.map(s => s.compactText);
const embeddings = await this.embeddingProvider.batchEmbed(texts);
for (let i = 0; i < this.shards.length; i++) {
this.shards[i].embedding = embeddings[i];
}
this.indexed = true;
}
/**
* Classify task intent
*/
classifyIntent(taskDescription) {
let bestIntent = 'general';
let bestScore = 0;
for (const [intent, patterns] of Object.entries(INTENT_PATTERNS)) {
if (intent === 'general')
continue; // Skip general fallback during scoring
let score = 0;
for (const { pattern, weight } of patterns) {
if (pattern.test(taskDescription)) {
score += weight;
}
}
if (score > bestScore) {
bestScore = score;
bestIntent = intent;
}
}
// Normalize confidence to 0-1
const confidence = Math.min(bestScore / 3, 1);
return { intent: bestIntent, confidence };
}
/**
* Retrieve relevant shards for a task
*
* Contract:
* 1. Always include the constitution
* 2. Up to maxShards by semantic similarity
* 3. Hard filters by risk class and repo scope
* 4. Contradiction check: prefer higher priority
*/
async retrieve(request) {
const startTime = performance.now();
if (!this.constitution) {
throw new Error('No policy bundle loaded. Call loadBundle() first.');
}
// Step 1: Classify intent
const { intent: detectedIntent } = this.classifyIntent(request.taskDescription);
const intent = request.intent ?? detectedIntent;
// Step 2: Generate query embedding
const queryEmbedding = await this.embeddingProvider.embed(request.taskDescription);
// Step 3: Score all shards
const maxShards = request.maxShards ?? 5;
const scored = this.scoreShards(queryEmbedding, intent, request.riskFilter, request.repoScope);
// Step 4: Select top N with contradiction resolution
const selected = this.selectWithContradictionCheck(scored, maxShards);
// Step 5: Build combined policy text
const policyText = this.buildPolicyText(this.constitution, selected);
const latencyMs = performance.now() - startTime;
return {
constitution: this.constitution,
shards: selected,
detectedIntent: intent,
contradictionsResolved: this.countContradictions(selected),
policyText,
latencyMs,
};
}
/**
* Score all shards against the query
*/
scoreShards(queryEmbedding, intent, riskFilter, repoScope) {
const results = [];
for (const shard of this.shards) {
// Hard filter: risk class
if (riskFilter && riskFilter.length > 0) {
if (!riskFilter.includes(shard.rule.riskClass))
continue;
}
// Hard filter: repo scope
if (repoScope) {
const matchesScope = shard.rule.repoScopes.some(scope => scope === '**/*' || this.matchGlob(repoScope, scope));
if (!matchesScope)
continue;
}
// Semantic similarity
let similarity = 0;
if (shard.embedding) {
similarity = this.cosineSimilarity(queryEmbedding, shard.embedding);
}
// Intent boost: if shard matches detected intent, boost score
const intentBoost = shard.rule.intents.includes(intent) ? 0.15 : 0;
// Risk boost: critical/high rules get a boost
const riskBoost = shard.rule.riskClass === 'critical' ? 0.1
: shard.rule.riskClass === 'high' ? 0.05
: 0;
const finalScore = similarity + intentBoost + riskBoost;
const reasons = [];
if (similarity > 0.3)
reasons.push(`semantic match (${(similarity * 100).toFixed(0)}%)`);
if (intentBoost > 0)
reasons.push(`intent match (${intent})`);
if (riskBoost > 0)
reasons.push(`risk priority (${shard.rule.riskClass})`);
results.push({
shard,
similarity: finalScore,
reason: reasons.join(', ') || 'general relevance',
});
}
// Sort by combined score descending
return results.sort((a, b) => b.similarity - a.similarity);
}
/**
* Select top N shards with contradiction checking
* When two rules contradict, keep the one with higher priority
*/
selectWithContradictionCheck(scored, maxShards) {
const selected = [];
const selectedDomains = new Map(); // domain -> highest priority
for (const item of scored) {
if (selected.length >= maxShards)
break;
// Check for potential contradictions with already selected shards
let dominated = false;
for (const domain of item.shard.rule.domains) {
const existingPriority = selectedDomains.get(domain);
if (existingPriority !== undefined && existingPriority > item.shard.rule.priority) {
// Higher priority rule already selected for this domain
// Check if they're likely contradictory (similar domain, different intent)
const existing = selected.find(s => s.shard.rule.domains.includes(domain) &&
s.shard.rule.priority > item.shard.rule.priority);
if (existing && this.areContradictory(existing.shard.rule, item.shard.rule)) {
dominated = true;
break;
}
}
}
if (!dominated) {
selected.push(item);
for (const domain of item.shard.rule.domains) {
const current = selectedDomains.get(domain) ?? 0;
selectedDomains.set(domain, Math.max(current, item.shard.rule.priority));
}
}
}
return selected;
}
/**
* Check if two rules are contradictory
*/
areContradictory(a, b) {
const negationPatterns = [
{ positive: /\bmust\b/i, negative: /\bnever\b|\bdo not\b|\bavoid\b/i },
{ positive: /\balways\b/i, negative: /\bnever\b|\bdon't\b/i },
{ positive: /\brequire\b/i, negative: /\bforbid\b|\bprohibit\b/i },
];
for (const { positive, negative } of negationPatterns) {
if ((positive.test(a.text) && negative.test(b.text)) ||
(negative.test(a.text) && positive.test(b.text))) {
return true;
}
}
return false;
}
/**
* Count contradictions in selected set
*/
countContradictions(selected) {
let count = 0;
for (let i = 0; i < selected.length; i++) {
for (let j = i + 1; j < selected.length; j++) {
if (this.areContradictory(selected[i].shard.rule, selected[j].shard.rule)) {
count++;
}
}
}
return count;
}
/**
* Build combined policy text for injection
*/
buildPolicyText(constitution, shards) {
const parts = [];
// Always include constitution
parts.push(constitution.text);
// Add retrieved shards
if (shards.length > 0) {
parts.push('');
parts.push('## Task-Specific Rules');
parts.push('');
for (const { shard, reason } of shards) {
parts.push(`- ${shard.compactText}`);
}
}
return parts.join('\n');
}
/**
* Simple glob matching (supports * and **).
* Compiled regexes are cached per glob to avoid re-compiling on every call.
*/
matchGlob(path, glob) {
let re = this.globCache.get(glob);
if (!re) {
const pattern = glob
.replace(/\*\*/g, '{{GLOBSTAR}}')
.replace(/\*/g, '[^/]*')
.replace(/{{GLOBSTAR}}/g, '.*')
.replace(/\//g, '\\/');
re = new RegExp(`^${pattern}$`);
this.globCache.set(glob, re);
}
return re.test(path);
}
/**
* Cosine similarity between two vectors
*/
cosineSimilarity(a, b) {
if (a.length !== b.length)
return 0;
let dot = 0, normA = 0, normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const denom = Math.sqrt(normA) * Math.sqrt(normB);
return denom > 0 ? Math.max(0, Math.min(1, dot / denom)) : 0;
}
/**
* Get current shard count
*/
get shardCount() {
return this.shards.length;
}
/**
* Get constitution
*/
getConstitution() {
return this.constitution;
}
}
/**
* Create a retriever instance
*/
export function createRetriever(embeddingProvider) {
return new ShardRetriever(embeddingProvider);
}
//# sourceMappingURL=retriever.js.map