tasq/node_modules/@claude-flow/hooks/dist/llm/llm-hooks.js

382 lines
11 KiB
JavaScript

/**
* V3 LLM Hooks System
*
* Provides pre/post operation hooks for all LLM calls with:
* - Request caching with memory persistence
* - Provider-specific optimizations
* - Cost tracking and optimization
* - Performance metrics
* - Pattern learning
*
* @module @claude-flow/hooks/llm/llm-hooks
*/
import { reasoningBank } from '../reasoningbank/index.js';
const responseCache = new Map();
const DEFAULT_CACHE_TTL = 3600000; // 1 hour
const MAX_CACHE_SIZE = 1000;
function generateCacheKey(provider, model, request) {
const normalized = {
provider,
model,
messages: request.messages,
temperature: request.temperature,
maxTokens: request.maxTokens,
};
return Buffer.from(JSON.stringify(normalized)).toString('base64').slice(0, 64);
}
function getCached(key) {
const entry = responseCache.get(key);
if (!entry)
return undefined;
if (Date.now() - entry.timestamp > DEFAULT_CACHE_TTL) {
responseCache.delete(key);
return undefined;
}
entry.hits++;
return entry;
}
function setCache(key, response) {
// Enforce max size
if (responseCache.size >= MAX_CACHE_SIZE) {
const oldest = Array.from(responseCache.entries())
.sort((a, b) => a[1].timestamp - b[1].timestamp)[0];
if (oldest)
responseCache.delete(oldest[0]);
}
responseCache.set(key, {
response,
timestamp: Date.now(),
hits: 0,
key,
});
}
const providerOptimizations = {
anthropic: {
preferredTemperature: 0.7,
systemPromptOptimizations: [
'Be concise and direct',
'Use structured output when appropriate',
],
costReductionStrategies: [
'Use claude-3-haiku for simple tasks',
'Batch similar requests',
],
},
openai: {
preferredTemperature: 0.8,
systemPromptOptimizations: [
'Respond in a structured format',
],
costReductionStrategies: [
'Use gpt-4o-mini for simple tasks',
'Enable response caching',
],
},
google: {
preferredTemperature: 0.7,
costReductionStrategies: [
'Use gemini-1.5-flash for simple tasks',
],
},
ollama: {
preferredTemperature: 0.7,
costReductionStrategies: [
'Free - no cost optimization needed',
],
},
};
function loadProviderOptimizations(provider) {
return providerOptimizations[provider] || {};
}
function applyRequestOptimizations(request, optimizations) {
const optimized = { ...request };
// Apply temperature if not set
if (optimized.temperature === undefined && optimizations.preferredTemperature) {
optimized.temperature = optimizations.preferredTemperature;
}
return optimized;
}
// ===== PRE-LLM HOOK =====
export async function preLLMCallHook(payload, context) {
const { provider, model, request } = payload;
const sideEffects = [];
// Check cache
const cacheKey = generateCacheKey(provider, model, request);
const cached = getCached(cacheKey);
if (cached) {
sideEffects.push({
type: 'metric',
action: 'increment',
data: { name: 'llm.cache.hits', provider, model },
});
return {
continue: false, // Skip LLM call
modified: true,
cachedResponse: cached.response,
payload: {
...payload,
metrics: {
...payload.metrics,
requestStart: Date.now(),
cacheHit: true,
},
},
sideEffects,
};
}
// Load and apply optimizations
const optimizations = loadProviderOptimizations(provider);
const optimizedRequest = applyRequestOptimizations(request, optimizations);
// Track request
sideEffects.push({
type: 'metric',
action: 'increment',
data: { name: `llm.calls.${provider}.${model}` },
}, {
type: 'memory',
action: 'store',
data: {
key: `llm:request:${context.correlationId}`,
value: {
provider,
model,
request: optimizedRequest,
timestamp: Date.now(),
},
ttl: 3600,
},
});
return {
continue: true,
modified: optimizedRequest !== request,
payload: {
...payload,
request: optimizedRequest,
metrics: {
...payload.metrics,
requestStart: Date.now(),
cacheHit: false,
},
},
sideEffects,
};
}
// ===== POST-LLM HOOK =====
export async function postLLMCallHook(payload, context) {
const { provider, model, request, response, metrics } = payload;
const sideEffects = [];
if (!response) {
return { continue: true, modified: false };
}
const latency = metrics?.requestStart
? Date.now() - metrics.requestStart
: undefined;
// Cache response
const cacheKey = generateCacheKey(provider, model, request);
setCache(cacheKey, response);
// Track metrics
sideEffects.push({
type: 'metric',
action: 'record',
data: {
name: `llm.latency.${provider}`,
value: latency,
},
}, {
type: 'metric',
action: 'record',
data: {
name: `llm.tokens.${provider}`,
value: response.usage?.totalTokens,
},
});
if (response.cost) {
sideEffects.push({
type: 'metric',
action: 'record',
data: {
name: `llm.cost.${provider}`,
value: response.cost.totalCost,
},
});
}
// Learn patterns from successful responses
if (response.content && response.content.length > 100) {
const pattern = extractPatternFromResponse(request, response);
if (pattern) {
sideEffects.push({
type: 'pattern',
action: 'learn',
data: {
strategy: pattern.strategy,
domain: pattern.domain,
quality: pattern.quality,
},
});
// Store in reasoning bank
try {
await reasoningBank.storePattern(pattern.strategy, pattern.domain);
}
catch {
// Ignore storage errors
}
}
}
// Store response in memory
sideEffects.push({
type: 'memory',
action: 'store',
data: {
key: `llm:response:${context.correlationId}`,
value: {
provider,
model,
response: {
content: response.content.slice(0, 500), // Truncate
usage: response.usage,
cost: response.cost,
latency,
},
timestamp: Date.now(),
},
ttl: 86400, // 24 hours
},
});
return {
continue: true,
modified: false,
payload: {
...payload,
metrics: {
requestStart: metrics?.requestStart ?? Date.now(),
responseEnd: Date.now(),
latency,
cacheHit: metrics?.cacheHit,
tokenEstimate: metrics?.tokenEstimate,
costEstimate: metrics?.costEstimate,
},
},
sideEffects,
};
}
// ===== ERROR HOOK =====
export async function errorLLMCallHook(payload, error, context) {
const { provider, model, metrics } = payload;
const sideEffects = [];
const latency = metrics?.requestStart
? Date.now() - metrics.requestStart
: undefined;
// Track error metrics
sideEffects.push({
type: 'metric',
action: 'increment',
data: {
name: `llm.errors.${provider}`,
errorType: error.name,
},
}, {
type: 'log',
action: 'error',
data: {
message: `LLM call failed: ${error.message}`,
provider,
model,
latency,
errorType: error.name,
},
});
// Store error for learning
sideEffects.push({
type: 'memory',
action: 'store',
data: {
key: `llm:error:${context.correlationId}`,
value: {
provider,
model,
error: {
name: error.name,
message: error.message,
},
timestamp: Date.now(),
},
ttl: 86400,
},
});
return {
continue: true,
modified: false,
sideEffects,
};
}
function extractPatternFromResponse(request, response) {
// Simple heuristic pattern extraction
const lastUserMessage = [...request.messages]
.reverse()
.find((m) => m.role === 'user');
if (!lastUserMessage)
return null;
// Detect domain from content
const content = lastUserMessage.content.toLowerCase();
let domain = 'general';
if (content.includes('code') || content.includes('function') || content.includes('implement')) {
domain = 'code';
}
else if (content.includes('security') || content.includes('vulnerability')) {
domain = 'security';
}
else if (content.includes('test') || content.includes('spec')) {
domain = 'testing';
}
else if (content.includes('architecture') || content.includes('design')) {
domain = 'architecture';
}
else if (content.includes('performance') || content.includes('optimize')) {
domain = 'performance';
}
// Extract strategy from response
const responseContent = response.content;
const strategy = responseContent.length > 500
? responseContent.slice(0, 500)
: responseContent;
// Quality based on response length and structure
const hasCodeBlocks = responseContent.includes('```');
const hasLists = responseContent.includes('\n- ') || responseContent.includes('\n1.');
const quality = Math.min(0.9, 0.5 + (hasCodeBlocks ? 0.2 : 0) + (hasLists ? 0.1 : 0) + (responseContent.length > 1000 ? 0.1 : 0));
return {
strategy: `[${domain}] ${strategy.slice(0, 200)}...`,
domain,
quality,
};
}
// ===== CACHE MANAGEMENT =====
export function clearLLMCache() {
responseCache.clear();
}
export function getLLMCacheStats() {
let totalHits = 0;
const entries = [];
for (const [key, entry] of responseCache) {
totalHits += entry.hits;
entries.push({
key,
hits: entry.hits,
age: Date.now() - entry.timestamp,
});
}
return {
size: responseCache.size,
totalHits,
entries: entries.slice(0, 10), // Top 10
};
}
// ===== EXPORTS =====
export const llmHooks = {
preLLMCall: preLLMCallHook,
postLLMCall: postLLMCallHook,
errorLLMCall: errorLLMCallHook,
clearCache: clearLLMCache,
getCacheStats: getLLMCacheStats,
};
export default llmHooks;
//# sourceMappingURL=llm-hooks.js.map