tasq/node_modules/@claude-flow/aidefence/dist/domain/services/threat-detection-service.js

/**
 * Threat Detection Service
 *
 * Core detection logic for AI manipulation attempts.
 * Embedded implementation based on AIMDS patterns.
 *
 * Performance targets:
 * - Detection: <10ms
 * - Pattern matching: <5ms
 * - PII scan: <3ms
 */
import { createThreat } from '../entities/threat.js';
import { createHash } from 'crypto';
/**
 * Prompt injection patterns (50+ patterns from AIMDS)
 */
const PROMPT_INJECTION_PATTERNS = [
    // Instruction override patterns
    {
        pattern: /ignore\s+(all\s+)?(previous\s+)?instructions/i,
        type: 'instruction_override',
        severity: 'critical',
        description: 'Attempt to override system instructions',
        baseConfidence: 0.95,
    },
    {
        pattern: /forget\s+(everything|all|previous)/i,
        type: 'instruction_override',
        severity: 'critical',
        description: 'Attempt to reset system context',
        baseConfidence: 0.92,
    },
    {
        pattern: /disregard\s+(all\s+)?(previous|prior|above)/i,
        type: 'instruction_override',
        severity: 'critical',
        description: 'Attempt to disregard instructions',
        baseConfidence: 0.93,
    },
    {
        pattern: /do\s+not\s+follow\s+(the\s+)?(previous|above|prior)/i,
        type: 'instruction_override',
        severity: 'high',
        description: 'Attempt to bypass previous instructions',
        baseConfidence: 0.88,
    },
    // Role switching patterns
    {
        pattern: /you\s+are\s+now\s+(?!going|about|ready)/i,
        type: 'role_switching',
        severity: 'high',
        description: 'Attempt to change AI identity',
        baseConfidence: 0.85,
    },
    {
        pattern: /act\s+as\s+(if\s+you\s+are\s+)?a?\s*(different|new|another)/i,
        type: 'role_switching',
        severity: 'high',
        description: 'Attempt to assume different role',
        baseConfidence: 0.82,
    },
    {
        pattern: /pretend\s+(to\s+be|you\s+are)/i,
        type: 'role_switching',
        severity: 'medium',
        description: 'Roleplay instruction that may be legitimate',
        baseConfidence: 0.65,
    },
    // Jailbreak patterns
    {
        pattern: /\bDAN\b.*\bmode\b|\bmode\b.*\bDAN\b/i,
        type: 'jailbreak',
        severity: 'critical',
        description: 'DAN jailbreak attempt',
        baseConfidence: 0.98,
    },
    {
        pattern: /jailbreak/i,
        type: 'jailbreak',
        severity: 'critical',
        description: 'Explicit jailbreak mention',
        baseConfidence: 0.95,
    },
    {
        pattern: /bypass\s+(your\s+)?(restrictions|limitations|rules|filters)/i,
        type: 'jailbreak',
        severity: 'critical',
        description: 'Attempt to bypass restrictions',
        baseConfidence: 0.93,
    },
    {
        pattern: /without\s+(any\s+)?(restrictions|limitations|rules)/i,
        type: 'jailbreak',
        severity: 'high',
        description: 'Request for unrestricted output',
        baseConfidence: 0.85,
    },
    {
        pattern: /disable\s+(your\s+)?(safety|content\s+)?filter/i,
        type: 'jailbreak',
        severity: 'critical',
        description: 'Attempt to disable safety filters',
        baseConfidence: 0.96,
    },
    // Context manipulation patterns
    {
        pattern: /system\s*:\s*|<\|system\|>|<system>/i,
        type: 'context_manipulation',
        severity: 'critical',
        description: 'Fake system message injection',
        baseConfidence: 0.97,
    },
    {
        pattern: /\[system\]|\{system\}|system\s+prompt/i,
        type: 'context_manipulation',
        severity: 'high',
        description: 'System prompt reference',
        baseConfidence: 0.88,
    },
    {
        pattern: /reveal\s+(your\s+)?(system\s+)?prompt/i,
        type: 'context_manipulation',
        severity: 'high',
        description: 'Attempt to extract system prompt',
        baseConfidence: 0.90,
    },
    {
        pattern: /what\s+(is|are)\s+your\s+(initial\s+)?instructions/i,
        type: 'context_manipulation',
        severity: 'medium',
        description: 'Query for system instructions',
        baseConfidence: 0.75,
    },
    {
        pattern: /\[\[.*?\]\]|<<.*?>>|\{\{.*?\}\}/,
        type: 'context_manipulation',
        severity: 'medium',
        description: 'Special bracket injection attempt',
        baseConfidence: 0.70,
    },
    // Encoding attack patterns
    {
        pattern: /base64|rot13|hex\s+encode|url\s+encode/i,
        type: 'encoding_attack',
        severity: 'medium',
        description: 'Potential encoding-based bypass',
        baseConfidence: 0.60,
    },
    {
        pattern: /decode\s+this|encrypted\s+message/i,
        type: 'encoding_attack',
        severity: 'medium',
        description: 'Request to decode potentially malicious content',
        baseConfidence: 0.55,
    },
    // Hypothetical bypass patterns
    {
        pattern: /hypothetically|in\s+theory|theoretically|if\s+you\s+could/i,
        type: 'prompt_injection',
        severity: 'low',
        description: 'Hypothetical framing (may be legitimate)',
        baseConfidence: 0.45,
    },
    {
        pattern: /for\s+(educational|research|academic)\s+purposes/i,
        type: 'prompt_injection',
        severity: 'low',
        description: 'Educational framing (often legitimate)',
        baseConfidence: 0.35,
    },
    // Developer mode patterns
    {
        pattern: /developer\s+mode|dev\s+mode|debug\s+mode/i,
        type: 'jailbreak',
        severity: 'high',
        description: 'Attempt to enable developer mode',
        baseConfidence: 0.85,
    },
    {
        pattern: /enable\s+(hidden|secret|special)\s+(features|mode|commands)/i,
        type: 'jailbreak',
        severity: 'high',
        description: 'Attempt to enable hidden features',
        baseConfidence: 0.88,
    },
    // Delimiter abuse patterns
    {
        pattern: /```system|```instruction|```prompt/i,
        type: 'context_manipulation',
        severity: 'high',
        description: 'Code block delimiter abuse',
        baseConfidence: 0.85,
    },
    {
        pattern: /---\s*(system|instruction|prompt)/i,
        type: 'context_manipulation',
        severity: 'medium',
        description: 'Markdown delimiter abuse',
        baseConfidence: 0.70,
    },
];
/**
 * PII detection patterns
 */
const PII_PATTERNS = [
    {
        pattern: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
        type: 'email',
        description: 'Email address',
    },
    {
        pattern: /\b\d{3}-\d{2}-\d{4}\b/g,
        type: 'ssn',
        description: 'Social Security Number',
    },
    {
        pattern: /\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b/g,
        type: 'credit_card',
        description: 'Credit card number',
    },
    {
        pattern: /\b(sk-[a-zA-Z0-9]{48}|sk-ant-[a-zA-Z0-9-]{90,})\b/g,
        type: 'api_key',
        description: 'API key (OpenAI/Anthropic format)',
    },
    {
        pattern: /\b(ghp_[a-zA-Z0-9]{36}|github_pat_[a-zA-Z0-9_]{82})\b/g,
        type: 'api_key',
        description: 'GitHub token',
    },
    {
        pattern: /password\s*[:=]\s*["']?[^"'\s]{4,}["']?/gi,
        type: 'password',
        description: 'Hardcoded password',
    },
];
/**
 * Threat Detection Service
 */
export class ThreatDetectionService {
    patterns;
    detectionCount = 0;
    totalDetectionTimeMs = 0;
    constructor(customPatterns) {
        this.patterns = customPatterns ?? PROMPT_INJECTION_PATTERNS;
    }
    /**
     * Detect threats in input text
     * Target: <10ms latency
     */
    detect(input) {
        const startTime = performance.now();
        const threats = [];
        // Normalize input
        const normalizedInput = this.normalizeInput(input);
        // Pattern matching
        for (const pattern of this.patterns) {
            const match = pattern.pattern.exec(normalizedInput);
            if (match) {
                // Calculate confidence with context
                const confidence = this.calculateConfidence(pattern, match, normalizedInput);
                threats.push(createThreat({
                    type: pattern.type,
                    severity: this.adjustSeverity(pattern.severity, confidence),
                    confidence,
                    pattern: pattern.pattern.source,
                    description: pattern.description,
                    location: {
                        start: match.index,
                        end: match.index + match[0].length,
                    },
                }));
            }
        }
        // PII detection
        const piiFound = this.detectPII(input);
        const detectionTimeMs = performance.now() - startTime;
        this.detectionCount++;
        this.totalDetectionTimeMs += detectionTimeMs;
        return {
            safe: threats.length === 0,
            threats: this.deduplicateThreats(threats),
            detectionTimeMs,
            piiFound,
            inputHash: this.hashInput(input),
        };
    }
    /**
     * Quick scan - pattern matching only
     * Target: <5ms latency
     */
    quickScan(input) {
        const normalizedInput = this.normalizeInput(input);
        let maxConfidence = 0;
        let threatFound = false;
        for (const pattern of this.patterns) {
            if (pattern.pattern.test(normalizedInput)) {
                threatFound = true;
                maxConfidence = Math.max(maxConfidence, pattern.baseConfidence);
                // Early exit on critical threats
                if (pattern.severity === 'critical') {
                    return { threat: true, confidence: maxConfidence };
                }
            }
        }
        return { threat: threatFound, confidence: maxConfidence };
    }
    /**
     * Detect PII in text
     */
    detectPII(input) {
        for (const pii of PII_PATTERNS) {
            if (pii.pattern.test(input)) {
                return true;
            }
        }
        return false;
    }
    /**
     * Get detection statistics
     */
    getStats() {
        return {
            detectionCount: this.detectionCount,
            avgDetectionTimeMs: this.detectionCount > 0
                ? this.totalDetectionTimeMs / this.detectionCount
                : 0,
        };
    }
    /**
     * Normalize input for consistent detection
     */
    normalizeInput(input) {
        return input
            // Normalize unicode
            .normalize('NFKC')
            // Remove zero-width characters
            .replace(/[\u200B-\u200D\uFEFF]/g, '')
            // Normalize whitespace
            .replace(/\s+/g, ' ')
            .trim();
    }
    /**
     * Calculate confidence with contextual factors
     */
    calculateConfidence(pattern, match, input) {
        let confidence = pattern.baseConfidence;
        // Boost confidence if multiple threat indicators
        const threatIndicatorCount = this.patterns.filter(p => p.pattern.test(input)).length;
        if (threatIndicatorCount > 1) {
            confidence = Math.min(confidence + 0.05 * (threatIndicatorCount - 1), 0.99);
        }
        // Reduce confidence for very short inputs (less context)
        if (input.length < 50) {
            confidence *= 0.9;
        }
        // Boost confidence if at start of input (more likely intentional)
        if (match.index < 20) {
            confidence = Math.min(confidence + 0.05, 0.99);
        }
        return Math.round(confidence * 100) / 100;
    }
    /**
     * Adjust severity based on confidence
     */
    adjustSeverity(baseSeverity, confidence) {
        if (confidence < 0.5 && baseSeverity === 'critical') {
            return 'high';
        }
        if (confidence < 0.4 && baseSeverity === 'high') {
            return 'medium';
        }
        return baseSeverity;
    }
    /**
     * Deduplicate threats by type
     */
    deduplicateThreats(threats) {
        const seen = new Map();
        for (const threat of threats) {
            const existing = seen.get(threat.type);
            if (!existing || threat.confidence > existing.confidence) {
                seen.set(threat.type, threat);
            }
        }
        return Array.from(seen.values())
            .sort((a, b) => {
            // Sort by severity first, then confidence
            const severityOrder = { critical: 0, high: 1, medium: 2, low: 3 };
            const severityDiff = severityOrder[a.severity] - severityOrder[b.severity];
            return severityDiff !== 0 ? severityDiff : b.confidence - a.confidence;
        });
    }
    /**
     * Hash input for caching/deduplication
     */
    hashInput(input) {
        return createHash('sha256').update(input).digest('hex').slice(0, 16);
    }
}
/**
 * Create a new ThreatDetectionService instance
 */
export function createThreatDetectionService(customPatterns) {
    return new ThreatDetectionService(customPatterns);
}
//# sourceMappingURL=threat-detection-service.js.map