394 lines
15 KiB
JavaScript
394 lines
15 KiB
JavaScript
/**
|
|
* Task Intent Classifier + Shard Retriever
|
|
*
|
|
* Stores rule shards in vector storage with embeddings and metadata.
|
|
* At task start, retrieves the top N shards by semantic similarity
|
|
* with hard filters by risk class and repo scope.
|
|
*
|
|
* Retrieval contract:
|
|
* 1. Always include the constitution
|
|
* 2. Retrieve up to 5 shards by semantic similarity
|
|
* 3. Add hard filters by risk class and repo scope
|
|
* 4. Contradiction check: prefer higher-priority rule ID
|
|
*
|
|
* @module @claude-flow/guidance/retriever
|
|
*/
|
|
// ============================================================================
|
|
// Intent Classification
|
|
// ============================================================================
|
|
/** Intent detection patterns with confidence weights */
|
|
const INTENT_PATTERNS = {
|
|
'bug-fix': [
|
|
{ pattern: /\b(fix|bug|broken|error|crash|issue|wrong|incorrect|fail)\b/i, weight: 0.8 },
|
|
{ pattern: /\b(not working|doesn't work|unexpected|regression)\b/i, weight: 0.9 },
|
|
],
|
|
'feature': [
|
|
{ pattern: /\b(add|create|implement|build|new|introduce|develop)\b/i, weight: 0.5 },
|
|
{ pattern: /\b(feature|capability|functionality|support for)\b/i, weight: 0.9 },
|
|
{ pattern: /\b(user|page|profile|dashboard|form|widget|component|module)\b/i, weight: 0.3 },
|
|
],
|
|
'refactor': [
|
|
{ pattern: /\b(refactor|restructure|reorganize|simplify|clean|extract|inline)\b/i, weight: 0.9 },
|
|
{ pattern: /\b(improve readability|reduce complexity|code quality)\b/i, weight: 0.8 },
|
|
],
|
|
'security': [
|
|
{ pattern: /\b(security|auth|permission|access control|encrypt|secret|token)\b/i, weight: 0.9 },
|
|
{ pattern: /\b(cve|vulnerability|injection|xss|csrf|sanitize)\b/i, weight: 1.0 },
|
|
],
|
|
'performance': [
|
|
{ pattern: /\b(performance|optimize|speed|slow|fast|cache|memory usage|latency)\b/i, weight: 0.9 },
|
|
{ pattern: /\b(bottleneck|profile|benchmark|throughput|efficient)\b/i, weight: 0.8 },
|
|
],
|
|
'testing': [
|
|
{ pattern: /\b(tests?|specs?|coverage|mocks?|asserts?|tdd|unit tests?|integration tests?)\b/i, weight: 1.0 },
|
|
{ pattern: /\b(test suite|test case|test plan|quality assurance)\b/i, weight: 0.9 },
|
|
],
|
|
'docs': [
|
|
{ pattern: /\b(document|readme|jsdoc|comment|explain|describe|tutorial)\b/i, weight: 0.8 },
|
|
{ pattern: /\b(api docs|documentation|usage guide|changelog)\b/i, weight: 0.9 },
|
|
],
|
|
'deployment': [
|
|
{ pattern: /\b(deploy|release|publish|ci|cd|pipeline|docker|kubernetes)\b/i, weight: 0.9 },
|
|
{ pattern: /\b(staging|production|rollback|migration|version)\b/i, weight: 0.7 },
|
|
],
|
|
'architecture': [
|
|
{ pattern: /\b(architect|design pattern|system design|structure|boundary)\b/i, weight: 0.8 },
|
|
{ pattern: /\b(module boundary|component architecture|layer|service mesh|domain model|aggregate root)\b/i, weight: 0.7 },
|
|
{ pattern: /\b(interface|api design|separation of concerns)\b/i, weight: 0.6 },
|
|
],
|
|
'debug': [
|
|
{ pattern: /\b(debug|trace|log|diagnose|investigate|root cause)\b/i, weight: 0.9 },
|
|
{ pattern: /\b(stack trace|breakpoint|inspect|reproduction)\b/i, weight: 0.8 },
|
|
],
|
|
'general': [
|
|
{ pattern: /./, weight: 0.1 },
|
|
],
|
|
};
|
|
/**
|
|
* Deterministic hash-based embedding provider — **test-only**.
|
|
*
|
|
* Produces fixed-dimension vectors from a simple character-hash → sin()
|
|
* transform. The resulting embeddings have no real semantic meaning;
|
|
* they are stable and fast, which makes them useful for unit/integration
|
|
* tests that need a concrete {@link IEmbeddingProvider} without loading
|
|
* an ONNX model.
|
|
*
|
|
* **Do NOT use in production** — replace with a real model-backed
|
|
* provider (e.g. the agentic-flow ONNX integration).
|
|
*/
|
|
export class HashEmbeddingProvider {
|
|
dimensions;
|
|
cache = new Map();
|
|
constructor(dimensions = 384) {
|
|
this.dimensions = dimensions;
|
|
}
|
|
async embed(text) {
|
|
const key = text.slice(0, 200);
|
|
if (this.cache.has(key))
|
|
return this.cache.get(key);
|
|
const embedding = this.hashEmbed(text);
|
|
this.cache.set(key, embedding);
|
|
return embedding;
|
|
}
|
|
async batchEmbed(texts) {
|
|
return Promise.all(texts.map(t => this.embed(t)));
|
|
}
|
|
hashEmbed(text) {
|
|
const embedding = new Float32Array(this.dimensions);
|
|
const normalized = text.toLowerCase().trim();
|
|
for (let i = 0; i < this.dimensions; i++) {
|
|
let hash = 0;
|
|
for (let j = 0; j < normalized.length; j++) {
|
|
hash = ((hash << 5) - hash + normalized.charCodeAt(j) * (i + 1)) | 0;
|
|
}
|
|
embedding[i] = (Math.sin(hash) + 1) / 2;
|
|
}
|
|
// L2 normalize
|
|
let norm = 0;
|
|
for (let i = 0; i < this.dimensions; i++) {
|
|
norm += embedding[i] * embedding[i];
|
|
}
|
|
norm = Math.sqrt(norm);
|
|
if (norm > 0) {
|
|
for (let i = 0; i < this.dimensions; i++) {
|
|
embedding[i] /= norm;
|
|
}
|
|
}
|
|
return embedding;
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Shard Retriever
|
|
// ============================================================================
|
|
export class ShardRetriever {
|
|
shards = [];
|
|
constitution = null;
|
|
embeddingProvider;
|
|
indexed = false;
|
|
globCache = new Map();
|
|
constructor(embeddingProvider) {
|
|
this.embeddingProvider = embeddingProvider ?? new HashEmbeddingProvider();
|
|
}
|
|
/**
|
|
* Load a compiled policy bundle
|
|
*/
|
|
async loadBundle(bundle) {
|
|
this.constitution = bundle.constitution;
|
|
this.shards = bundle.shards;
|
|
this.indexed = false;
|
|
await this.indexShards();
|
|
}
|
|
/**
|
|
* Index all shards by generating embeddings
|
|
*/
|
|
async indexShards() {
|
|
if (this.indexed)
|
|
return;
|
|
const texts = this.shards.map(s => s.compactText);
|
|
const embeddings = await this.embeddingProvider.batchEmbed(texts);
|
|
for (let i = 0; i < this.shards.length; i++) {
|
|
this.shards[i].embedding = embeddings[i];
|
|
}
|
|
this.indexed = true;
|
|
}
|
|
/**
|
|
* Classify task intent
|
|
*/
|
|
classifyIntent(taskDescription) {
|
|
let bestIntent = 'general';
|
|
let bestScore = 0;
|
|
for (const [intent, patterns] of Object.entries(INTENT_PATTERNS)) {
|
|
if (intent === 'general')
|
|
continue; // Skip general fallback during scoring
|
|
let score = 0;
|
|
for (const { pattern, weight } of patterns) {
|
|
if (pattern.test(taskDescription)) {
|
|
score += weight;
|
|
}
|
|
}
|
|
if (score > bestScore) {
|
|
bestScore = score;
|
|
bestIntent = intent;
|
|
}
|
|
}
|
|
// Normalize confidence to 0-1
|
|
const confidence = Math.min(bestScore / 3, 1);
|
|
return { intent: bestIntent, confidence };
|
|
}
|
|
/**
|
|
* Retrieve relevant shards for a task
|
|
*
|
|
* Contract:
|
|
* 1. Always include the constitution
|
|
* 2. Up to maxShards by semantic similarity
|
|
* 3. Hard filters by risk class and repo scope
|
|
* 4. Contradiction check: prefer higher priority
|
|
*/
|
|
async retrieve(request) {
|
|
const startTime = performance.now();
|
|
if (!this.constitution) {
|
|
throw new Error('No policy bundle loaded. Call loadBundle() first.');
|
|
}
|
|
// Step 1: Classify intent
|
|
const { intent: detectedIntent } = this.classifyIntent(request.taskDescription);
|
|
const intent = request.intent ?? detectedIntent;
|
|
// Step 2: Generate query embedding
|
|
const queryEmbedding = await this.embeddingProvider.embed(request.taskDescription);
|
|
// Step 3: Score all shards
|
|
const maxShards = request.maxShards ?? 5;
|
|
const scored = this.scoreShards(queryEmbedding, intent, request.riskFilter, request.repoScope);
|
|
// Step 4: Select top N with contradiction resolution
|
|
const selected = this.selectWithContradictionCheck(scored, maxShards);
|
|
// Step 5: Build combined policy text
|
|
const policyText = this.buildPolicyText(this.constitution, selected);
|
|
const latencyMs = performance.now() - startTime;
|
|
return {
|
|
constitution: this.constitution,
|
|
shards: selected,
|
|
detectedIntent: intent,
|
|
contradictionsResolved: this.countContradictions(selected),
|
|
policyText,
|
|
latencyMs,
|
|
};
|
|
}
|
|
/**
|
|
* Score all shards against the query
|
|
*/
|
|
scoreShards(queryEmbedding, intent, riskFilter, repoScope) {
|
|
const results = [];
|
|
for (const shard of this.shards) {
|
|
// Hard filter: risk class
|
|
if (riskFilter && riskFilter.length > 0) {
|
|
if (!riskFilter.includes(shard.rule.riskClass))
|
|
continue;
|
|
}
|
|
// Hard filter: repo scope
|
|
if (repoScope) {
|
|
const matchesScope = shard.rule.repoScopes.some(scope => scope === '**/*' || this.matchGlob(repoScope, scope));
|
|
if (!matchesScope)
|
|
continue;
|
|
}
|
|
// Semantic similarity
|
|
let similarity = 0;
|
|
if (shard.embedding) {
|
|
similarity = this.cosineSimilarity(queryEmbedding, shard.embedding);
|
|
}
|
|
// Intent boost: if shard matches detected intent, boost score
|
|
const intentBoost = shard.rule.intents.includes(intent) ? 0.15 : 0;
|
|
// Risk boost: critical/high rules get a boost
|
|
const riskBoost = shard.rule.riskClass === 'critical' ? 0.1
|
|
: shard.rule.riskClass === 'high' ? 0.05
|
|
: 0;
|
|
const finalScore = similarity + intentBoost + riskBoost;
|
|
const reasons = [];
|
|
if (similarity > 0.3)
|
|
reasons.push(`semantic match (${(similarity * 100).toFixed(0)}%)`);
|
|
if (intentBoost > 0)
|
|
reasons.push(`intent match (${intent})`);
|
|
if (riskBoost > 0)
|
|
reasons.push(`risk priority (${shard.rule.riskClass})`);
|
|
results.push({
|
|
shard,
|
|
similarity: finalScore,
|
|
reason: reasons.join(', ') || 'general relevance',
|
|
});
|
|
}
|
|
// Sort by combined score descending
|
|
return results.sort((a, b) => b.similarity - a.similarity);
|
|
}
|
|
/**
|
|
* Select top N shards with contradiction checking
|
|
* When two rules contradict, keep the one with higher priority
|
|
*/
|
|
selectWithContradictionCheck(scored, maxShards) {
|
|
const selected = [];
|
|
const selectedDomains = new Map(); // domain -> highest priority
|
|
for (const item of scored) {
|
|
if (selected.length >= maxShards)
|
|
break;
|
|
// Check for potential contradictions with already selected shards
|
|
let dominated = false;
|
|
for (const domain of item.shard.rule.domains) {
|
|
const existingPriority = selectedDomains.get(domain);
|
|
if (existingPriority !== undefined && existingPriority > item.shard.rule.priority) {
|
|
// Higher priority rule already selected for this domain
|
|
// Check if they're likely contradictory (similar domain, different intent)
|
|
const existing = selected.find(s => s.shard.rule.domains.includes(domain) &&
|
|
s.shard.rule.priority > item.shard.rule.priority);
|
|
if (existing && this.areContradictory(existing.shard.rule, item.shard.rule)) {
|
|
dominated = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!dominated) {
|
|
selected.push(item);
|
|
for (const domain of item.shard.rule.domains) {
|
|
const current = selectedDomains.get(domain) ?? 0;
|
|
selectedDomains.set(domain, Math.max(current, item.shard.rule.priority));
|
|
}
|
|
}
|
|
}
|
|
return selected;
|
|
}
|
|
/**
|
|
* Check if two rules are contradictory
|
|
*/
|
|
areContradictory(a, b) {
|
|
const negationPatterns = [
|
|
{ positive: /\bmust\b/i, negative: /\bnever\b|\bdo not\b|\bavoid\b/i },
|
|
{ positive: /\balways\b/i, negative: /\bnever\b|\bdon't\b/i },
|
|
{ positive: /\brequire\b/i, negative: /\bforbid\b|\bprohibit\b/i },
|
|
];
|
|
for (const { positive, negative } of negationPatterns) {
|
|
if ((positive.test(a.text) && negative.test(b.text)) ||
|
|
(negative.test(a.text) && positive.test(b.text))) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
/**
|
|
* Count contradictions in selected set
|
|
*/
|
|
countContradictions(selected) {
|
|
let count = 0;
|
|
for (let i = 0; i < selected.length; i++) {
|
|
for (let j = i + 1; j < selected.length; j++) {
|
|
if (this.areContradictory(selected[i].shard.rule, selected[j].shard.rule)) {
|
|
count++;
|
|
}
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
/**
|
|
* Build combined policy text for injection
|
|
*/
|
|
buildPolicyText(constitution, shards) {
|
|
const parts = [];
|
|
// Always include constitution
|
|
parts.push(constitution.text);
|
|
// Add retrieved shards
|
|
if (shards.length > 0) {
|
|
parts.push('');
|
|
parts.push('## Task-Specific Rules');
|
|
parts.push('');
|
|
for (const { shard, reason } of shards) {
|
|
parts.push(`- ${shard.compactText}`);
|
|
}
|
|
}
|
|
return parts.join('\n');
|
|
}
|
|
/**
|
|
* Simple glob matching (supports * and **).
|
|
* Compiled regexes are cached per glob to avoid re-compiling on every call.
|
|
*/
|
|
matchGlob(path, glob) {
|
|
let re = this.globCache.get(glob);
|
|
if (!re) {
|
|
const pattern = glob
|
|
.replace(/\*\*/g, '{{GLOBSTAR}}')
|
|
.replace(/\*/g, '[^/]*')
|
|
.replace(/{{GLOBSTAR}}/g, '.*')
|
|
.replace(/\//g, '\\/');
|
|
re = new RegExp(`^${pattern}$`);
|
|
this.globCache.set(glob, re);
|
|
}
|
|
return re.test(path);
|
|
}
|
|
/**
|
|
* Cosine similarity between two vectors
|
|
*/
|
|
cosineSimilarity(a, b) {
|
|
if (a.length !== b.length)
|
|
return 0;
|
|
let dot = 0, normA = 0, normB = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
return denom > 0 ? Math.max(0, Math.min(1, dot / denom)) : 0;
|
|
}
|
|
/**
|
|
* Get current shard count
|
|
*/
|
|
get shardCount() {
|
|
return this.shards.length;
|
|
}
|
|
/**
|
|
* Get constitution
|
|
*/
|
|
getConstitution() {
|
|
return this.constitution;
|
|
}
|
|
}
|
|
/**
|
|
* Create a retriever instance
|
|
*/
|
|
export function createRetriever(embeddingProvider) {
|
|
return new ShardRetriever(embeddingProvider);
|
|
}
|
|
//# sourceMappingURL=retriever.js.map
|