tasq/node_modules/@claude-flow/guidance/dist/retriever.js

/**
 * Task Intent Classifier + Shard Retriever
 *
 * Stores rule shards in vector storage with embeddings and metadata.
 * At task start, retrieves the top N shards by semantic similarity
 * with hard filters by risk class and repo scope.
 *
 * Retrieval contract:
 * 1. Always include the constitution
 * 2. Retrieve up to 5 shards by semantic similarity
 * 3. Add hard filters by risk class and repo scope
 * 4. Contradiction check: prefer higher-priority rule ID
 *
 * @module @claude-flow/guidance/retriever
 */
// ============================================================================
// Intent Classification
// ============================================================================
/** Intent detection patterns with confidence weights */
const INTENT_PATTERNS = {
    'bug-fix': [
        { pattern: /\b(fix|bug|broken|error|crash|issue|wrong|incorrect|fail)\b/i, weight: 0.8 },
        { pattern: /\b(not working|doesn't work|unexpected|regression)\b/i, weight: 0.9 },
    ],
    'feature': [
        { pattern: /\b(add|create|implement|build|new|introduce|develop)\b/i, weight: 0.5 },
        { pattern: /\b(feature|capability|functionality|support for)\b/i, weight: 0.9 },
        { pattern: /\b(user|page|profile|dashboard|form|widget|component|module)\b/i, weight: 0.3 },
    ],
    'refactor': [
        { pattern: /\b(refactor|restructure|reorganize|simplify|clean|extract|inline)\b/i, weight: 0.9 },
        { pattern: /\b(improve readability|reduce complexity|code quality)\b/i, weight: 0.8 },
    ],
    'security': [
        { pattern: /\b(security|auth|permission|access control|encrypt|secret|token)\b/i, weight: 0.9 },
        { pattern: /\b(cve|vulnerability|injection|xss|csrf|sanitize)\b/i, weight: 1.0 },
    ],
    'performance': [
        { pattern: /\b(performance|optimize|speed|slow|fast|cache|memory usage|latency)\b/i, weight: 0.9 },
        { pattern: /\b(bottleneck|profile|benchmark|throughput|efficient)\b/i, weight: 0.8 },
    ],
    'testing': [
        { pattern: /\b(tests?|specs?|coverage|mocks?|asserts?|tdd|unit tests?|integration tests?)\b/i, weight: 1.0 },
        { pattern: /\b(test suite|test case|test plan|quality assurance)\b/i, weight: 0.9 },
    ],
    'docs': [
        { pattern: /\b(document|readme|jsdoc|comment|explain|describe|tutorial)\b/i, weight: 0.8 },
        { pattern: /\b(api docs|documentation|usage guide|changelog)\b/i, weight: 0.9 },
    ],
    'deployment': [
        { pattern: /\b(deploy|release|publish|ci|cd|pipeline|docker|kubernetes)\b/i, weight: 0.9 },
        { pattern: /\b(staging|production|rollback|migration|version)\b/i, weight: 0.7 },
    ],
    'architecture': [
        { pattern: /\b(architect|design pattern|system design|structure|boundary)\b/i, weight: 0.8 },
        { pattern: /\b(module boundary|component architecture|layer|service mesh|domain model|aggregate root)\b/i, weight: 0.7 },
        { pattern: /\b(interface|api design|separation of concerns)\b/i, weight: 0.6 },
    ],
    'debug': [
        { pattern: /\b(debug|trace|log|diagnose|investigate|root cause)\b/i, weight: 0.9 },
        { pattern: /\b(stack trace|breakpoint|inspect|reproduction)\b/i, weight: 0.8 },
    ],
    'general': [
        { pattern: /./, weight: 0.1 },
    ],
};
/**
 * Deterministic hash-based embedding provider — **test-only**.
 *
 * Produces fixed-dimension vectors from a simple character-hash → sin()
 * transform.  The resulting embeddings have no real semantic meaning;
 * they are stable and fast, which makes them useful for unit/integration
 * tests that need a concrete {@link IEmbeddingProvider} without loading
 * an ONNX model.
 *
 * **Do NOT use in production** — replace with a real model-backed
 * provider (e.g. the agentic-flow ONNX integration).
 */
export class HashEmbeddingProvider {
    dimensions;
    cache = new Map();
    constructor(dimensions = 384) {
        this.dimensions = dimensions;
    }
    async embed(text) {
        const key = text.slice(0, 200);
        if (this.cache.has(key))
            return this.cache.get(key);
        const embedding = this.hashEmbed(text);
        this.cache.set(key, embedding);
        return embedding;
    }
    async batchEmbed(texts) {
        return Promise.all(texts.map(t => this.embed(t)));
    }
    hashEmbed(text) {
        const embedding = new Float32Array(this.dimensions);
        const normalized = text.toLowerCase().trim();
        for (let i = 0; i < this.dimensions; i++) {
            let hash = 0;
            for (let j = 0; j < normalized.length; j++) {
                hash = ((hash << 5) - hash + normalized.charCodeAt(j) * (i + 1)) | 0;
            }
            embedding[i] = (Math.sin(hash) + 1) / 2;
        }
        // L2 normalize
        let norm = 0;
        for (let i = 0; i < this.dimensions; i++) {
            norm += embedding[i] * embedding[i];
        }
        norm = Math.sqrt(norm);
        if (norm > 0) {
            for (let i = 0; i < this.dimensions; i++) {
                embedding[i] /= norm;
            }
        }
        return embedding;
    }
}
// ============================================================================
// Shard Retriever
// ============================================================================
export class ShardRetriever {
    shards = [];
    constitution = null;
    embeddingProvider;
    indexed = false;
    globCache = new Map();
    constructor(embeddingProvider) {
        this.embeddingProvider = embeddingProvider ?? new HashEmbeddingProvider();
    }
    /**
     * Load a compiled policy bundle
     */
    async loadBundle(bundle) {
        this.constitution = bundle.constitution;
        this.shards = bundle.shards;
        this.indexed = false;
        await this.indexShards();
    }
    /**
     * Index all shards by generating embeddings
     */
    async indexShards() {
        if (this.indexed)
            return;
        const texts = this.shards.map(s => s.compactText);
        const embeddings = await this.embeddingProvider.batchEmbed(texts);
        for (let i = 0; i < this.shards.length; i++) {
            this.shards[i].embedding = embeddings[i];
        }
        this.indexed = true;
    }
    /**
     * Classify task intent
     */
    classifyIntent(taskDescription) {
        let bestIntent = 'general';
        let bestScore = 0;
        for (const [intent, patterns] of Object.entries(INTENT_PATTERNS)) {
            if (intent === 'general')
                continue; // Skip general fallback during scoring
            let score = 0;
            for (const { pattern, weight } of patterns) {
                if (pattern.test(taskDescription)) {
                    score += weight;
                }
            }
            if (score > bestScore) {
                bestScore = score;
                bestIntent = intent;
            }
        }
        // Normalize confidence to 0-1
        const confidence = Math.min(bestScore / 3, 1);
        return { intent: bestIntent, confidence };
    }
    /**
     * Retrieve relevant shards for a task
     *
     * Contract:
     * 1. Always include the constitution
     * 2. Up to maxShards by semantic similarity
     * 3. Hard filters by risk class and repo scope
     * 4. Contradiction check: prefer higher priority
     */
    async retrieve(request) {
        const startTime = performance.now();
        if (!this.constitution) {
            throw new Error('No policy bundle loaded. Call loadBundle() first.');
        }
        // Step 1: Classify intent
        const { intent: detectedIntent } = this.classifyIntent(request.taskDescription);
        const intent = request.intent ?? detectedIntent;
        // Step 2: Generate query embedding
        const queryEmbedding = await this.embeddingProvider.embed(request.taskDescription);
        // Step 3: Score all shards
        const maxShards = request.maxShards ?? 5;
        const scored = this.scoreShards(queryEmbedding, intent, request.riskFilter, request.repoScope);
        // Step 4: Select top N with contradiction resolution
        const selected = this.selectWithContradictionCheck(scored, maxShards);
        // Step 5: Build combined policy text
        const policyText = this.buildPolicyText(this.constitution, selected);
        const latencyMs = performance.now() - startTime;
        return {
            constitution: this.constitution,
            shards: selected,
            detectedIntent: intent,
            contradictionsResolved: this.countContradictions(selected),
            policyText,
            latencyMs,
        };
    }
    /**
     * Score all shards against the query
     */
    scoreShards(queryEmbedding, intent, riskFilter, repoScope) {
        const results = [];
        for (const shard of this.shards) {
            // Hard filter: risk class
            if (riskFilter && riskFilter.length > 0) {
                if (!riskFilter.includes(shard.rule.riskClass))
                    continue;
            }
            // Hard filter: repo scope
            if (repoScope) {
                const matchesScope = shard.rule.repoScopes.some(scope => scope === '**/*' || this.matchGlob(repoScope, scope));
                if (!matchesScope)
                    continue;
            }
            // Semantic similarity
            let similarity = 0;
            if (shard.embedding) {
                similarity = this.cosineSimilarity(queryEmbedding, shard.embedding);
            }
            // Intent boost: if shard matches detected intent, boost score
            const intentBoost = shard.rule.intents.includes(intent) ? 0.15 : 0;
            // Risk boost: critical/high rules get a boost
            const riskBoost = shard.rule.riskClass === 'critical' ? 0.1
                : shard.rule.riskClass === 'high' ? 0.05
                    : 0;
            const finalScore = similarity + intentBoost + riskBoost;
            const reasons = [];
            if (similarity > 0.3)
                reasons.push(`semantic match (${(similarity * 100).toFixed(0)}%)`);
            if (intentBoost > 0)
                reasons.push(`intent match (${intent})`);
            if (riskBoost > 0)
                reasons.push(`risk priority (${shard.rule.riskClass})`);
            results.push({
                shard,
                similarity: finalScore,
                reason: reasons.join(', ') || 'general relevance',
            });
        }
        // Sort by combined score descending
        return results.sort((a, b) => b.similarity - a.similarity);
    }
    /**
     * Select top N shards with contradiction checking
     * When two rules contradict, keep the one with higher priority
     */
    selectWithContradictionCheck(scored, maxShards) {
        const selected = [];
        const selectedDomains = new Map(); // domain -> highest priority
        for (const item of scored) {
            if (selected.length >= maxShards)
                break;
            // Check for potential contradictions with already selected shards
            let dominated = false;
            for (const domain of item.shard.rule.domains) {
                const existingPriority = selectedDomains.get(domain);
                if (existingPriority !== undefined && existingPriority > item.shard.rule.priority) {
                    // Higher priority rule already selected for this domain
                    // Check if they're likely contradictory (similar domain, different intent)
                    const existing = selected.find(s => s.shard.rule.domains.includes(domain) &&
                        s.shard.rule.priority > item.shard.rule.priority);
                    if (existing && this.areContradictory(existing.shard.rule, item.shard.rule)) {
                        dominated = true;
                        break;
                    }
                }
            }
            if (!dominated) {
                selected.push(item);
                for (const domain of item.shard.rule.domains) {
                    const current = selectedDomains.get(domain) ?? 0;
                    selectedDomains.set(domain, Math.max(current, item.shard.rule.priority));
                }
            }
        }
        return selected;
    }
    /**
     * Check if two rules are contradictory
     */
    areContradictory(a, b) {
        const negationPatterns = [
            { positive: /\bmust\b/i, negative: /\bnever\b|\bdo not\b|\bavoid\b/i },
            { positive: /\balways\b/i, negative: /\bnever\b|\bdon't\b/i },
            { positive: /\brequire\b/i, negative: /\bforbid\b|\bprohibit\b/i },
        ];
        for (const { positive, negative } of negationPatterns) {
            if ((positive.test(a.text) && negative.test(b.text)) ||
                (negative.test(a.text) && positive.test(b.text))) {
                return true;
            }
        }
        return false;
    }
    /**
     * Count contradictions in selected set
     */
    countContradictions(selected) {
        let count = 0;
        for (let i = 0; i < selected.length; i++) {
            for (let j = i + 1; j < selected.length; j++) {
                if (this.areContradictory(selected[i].shard.rule, selected[j].shard.rule)) {
                    count++;
                }
            }
        }
        return count;
    }
    /**
     * Build combined policy text for injection
     */
    buildPolicyText(constitution, shards) {
        const parts = [];
        // Always include constitution
        parts.push(constitution.text);
        // Add retrieved shards
        if (shards.length > 0) {
            parts.push('');
            parts.push('## Task-Specific Rules');
            parts.push('');
            for (const { shard, reason } of shards) {
                parts.push(`- ${shard.compactText}`);
            }
        }
        return parts.join('\n');
    }
    /**
     * Simple glob matching (supports * and **).
     * Compiled regexes are cached per glob to avoid re-compiling on every call.
     */
    matchGlob(path, glob) {
        let re = this.globCache.get(glob);
        if (!re) {
            const pattern = glob
                .replace(/\*\*/g, '{{GLOBSTAR}}')
                .replace(/\*/g, '[^/]*')
                .replace(/{{GLOBSTAR}}/g, '.*')
                .replace(/\//g, '\\/');
            re = new RegExp(`^${pattern}$`);
            this.globCache.set(glob, re);
        }
        return re.test(path);
    }
    /**
     * Cosine similarity between two vectors
     */
    cosineSimilarity(a, b) {
        if (a.length !== b.length)
            return 0;
        let dot = 0, normA = 0, normB = 0;
        for (let i = 0; i < a.length; i++) {
            dot += a[i] * b[i];
            normA += a[i] * a[i];
            normB += b[i] * b[i];
        }
        const denom = Math.sqrt(normA) * Math.sqrt(normB);
        return denom > 0 ? Math.max(0, Math.min(1, dot / denom)) : 0;
    }
    /**
     * Get current shard count
     */
    get shardCount() {
        return this.shards.length;
    }
    /**
     * Get constitution
     */
    getConstitution() {
        return this.constitution;
    }
}
/**
 * Create a retriever instance
 */
export function createRetriever(embeddingProvider) {
    return new ShardRetriever(embeddingProvider);
}
//# sourceMappingURL=retriever.js.map