382 lines
11 KiB
JavaScript
382 lines
11 KiB
JavaScript
/**
|
|
* V3 LLM Hooks System
|
|
*
|
|
* Provides pre/post operation hooks for all LLM calls with:
|
|
* - Request caching with memory persistence
|
|
* - Provider-specific optimizations
|
|
* - Cost tracking and optimization
|
|
* - Performance metrics
|
|
* - Pattern learning
|
|
*
|
|
* @module @claude-flow/hooks/llm/llm-hooks
|
|
*/
|
|
import { reasoningBank } from '../reasoningbank/index.js';
|
|
const responseCache = new Map();
|
|
const DEFAULT_CACHE_TTL = 3600000; // 1 hour
|
|
const MAX_CACHE_SIZE = 1000;
|
|
function generateCacheKey(provider, model, request) {
|
|
const normalized = {
|
|
provider,
|
|
model,
|
|
messages: request.messages,
|
|
temperature: request.temperature,
|
|
maxTokens: request.maxTokens,
|
|
};
|
|
return Buffer.from(JSON.stringify(normalized)).toString('base64').slice(0, 64);
|
|
}
|
|
function getCached(key) {
|
|
const entry = responseCache.get(key);
|
|
if (!entry)
|
|
return undefined;
|
|
if (Date.now() - entry.timestamp > DEFAULT_CACHE_TTL) {
|
|
responseCache.delete(key);
|
|
return undefined;
|
|
}
|
|
entry.hits++;
|
|
return entry;
|
|
}
|
|
function setCache(key, response) {
|
|
// Enforce max size
|
|
if (responseCache.size >= MAX_CACHE_SIZE) {
|
|
const oldest = Array.from(responseCache.entries())
|
|
.sort((a, b) => a[1].timestamp - b[1].timestamp)[0];
|
|
if (oldest)
|
|
responseCache.delete(oldest[0]);
|
|
}
|
|
responseCache.set(key, {
|
|
response,
|
|
timestamp: Date.now(),
|
|
hits: 0,
|
|
key,
|
|
});
|
|
}
|
|
const providerOptimizations = {
|
|
anthropic: {
|
|
preferredTemperature: 0.7,
|
|
systemPromptOptimizations: [
|
|
'Be concise and direct',
|
|
'Use structured output when appropriate',
|
|
],
|
|
costReductionStrategies: [
|
|
'Use claude-3-haiku for simple tasks',
|
|
'Batch similar requests',
|
|
],
|
|
},
|
|
openai: {
|
|
preferredTemperature: 0.8,
|
|
systemPromptOptimizations: [
|
|
'Respond in a structured format',
|
|
],
|
|
costReductionStrategies: [
|
|
'Use gpt-4o-mini for simple tasks',
|
|
'Enable response caching',
|
|
],
|
|
},
|
|
google: {
|
|
preferredTemperature: 0.7,
|
|
costReductionStrategies: [
|
|
'Use gemini-1.5-flash for simple tasks',
|
|
],
|
|
},
|
|
ollama: {
|
|
preferredTemperature: 0.7,
|
|
costReductionStrategies: [
|
|
'Free - no cost optimization needed',
|
|
],
|
|
},
|
|
};
|
|
function loadProviderOptimizations(provider) {
|
|
return providerOptimizations[provider] || {};
|
|
}
|
|
function applyRequestOptimizations(request, optimizations) {
|
|
const optimized = { ...request };
|
|
// Apply temperature if not set
|
|
if (optimized.temperature === undefined && optimizations.preferredTemperature) {
|
|
optimized.temperature = optimizations.preferredTemperature;
|
|
}
|
|
return optimized;
|
|
}
|
|
// ===== PRE-LLM HOOK =====
|
|
export async function preLLMCallHook(payload, context) {
|
|
const { provider, model, request } = payload;
|
|
const sideEffects = [];
|
|
// Check cache
|
|
const cacheKey = generateCacheKey(provider, model, request);
|
|
const cached = getCached(cacheKey);
|
|
if (cached) {
|
|
sideEffects.push({
|
|
type: 'metric',
|
|
action: 'increment',
|
|
data: { name: 'llm.cache.hits', provider, model },
|
|
});
|
|
return {
|
|
continue: false, // Skip LLM call
|
|
modified: true,
|
|
cachedResponse: cached.response,
|
|
payload: {
|
|
...payload,
|
|
metrics: {
|
|
...payload.metrics,
|
|
requestStart: Date.now(),
|
|
cacheHit: true,
|
|
},
|
|
},
|
|
sideEffects,
|
|
};
|
|
}
|
|
// Load and apply optimizations
|
|
const optimizations = loadProviderOptimizations(provider);
|
|
const optimizedRequest = applyRequestOptimizations(request, optimizations);
|
|
// Track request
|
|
sideEffects.push({
|
|
type: 'metric',
|
|
action: 'increment',
|
|
data: { name: `llm.calls.${provider}.${model}` },
|
|
}, {
|
|
type: 'memory',
|
|
action: 'store',
|
|
data: {
|
|
key: `llm:request:${context.correlationId}`,
|
|
value: {
|
|
provider,
|
|
model,
|
|
request: optimizedRequest,
|
|
timestamp: Date.now(),
|
|
},
|
|
ttl: 3600,
|
|
},
|
|
});
|
|
return {
|
|
continue: true,
|
|
modified: optimizedRequest !== request,
|
|
payload: {
|
|
...payload,
|
|
request: optimizedRequest,
|
|
metrics: {
|
|
...payload.metrics,
|
|
requestStart: Date.now(),
|
|
cacheHit: false,
|
|
},
|
|
},
|
|
sideEffects,
|
|
};
|
|
}
|
|
// ===== POST-LLM HOOK =====
|
|
export async function postLLMCallHook(payload, context) {
|
|
const { provider, model, request, response, metrics } = payload;
|
|
const sideEffects = [];
|
|
if (!response) {
|
|
return { continue: true, modified: false };
|
|
}
|
|
const latency = metrics?.requestStart
|
|
? Date.now() - metrics.requestStart
|
|
: undefined;
|
|
// Cache response
|
|
const cacheKey = generateCacheKey(provider, model, request);
|
|
setCache(cacheKey, response);
|
|
// Track metrics
|
|
sideEffects.push({
|
|
type: 'metric',
|
|
action: 'record',
|
|
data: {
|
|
name: `llm.latency.${provider}`,
|
|
value: latency,
|
|
},
|
|
}, {
|
|
type: 'metric',
|
|
action: 'record',
|
|
data: {
|
|
name: `llm.tokens.${provider}`,
|
|
value: response.usage?.totalTokens,
|
|
},
|
|
});
|
|
if (response.cost) {
|
|
sideEffects.push({
|
|
type: 'metric',
|
|
action: 'record',
|
|
data: {
|
|
name: `llm.cost.${provider}`,
|
|
value: response.cost.totalCost,
|
|
},
|
|
});
|
|
}
|
|
// Learn patterns from successful responses
|
|
if (response.content && response.content.length > 100) {
|
|
const pattern = extractPatternFromResponse(request, response);
|
|
if (pattern) {
|
|
sideEffects.push({
|
|
type: 'pattern',
|
|
action: 'learn',
|
|
data: {
|
|
strategy: pattern.strategy,
|
|
domain: pattern.domain,
|
|
quality: pattern.quality,
|
|
},
|
|
});
|
|
// Store in reasoning bank
|
|
try {
|
|
await reasoningBank.storePattern(pattern.strategy, pattern.domain);
|
|
}
|
|
catch {
|
|
// Ignore storage errors
|
|
}
|
|
}
|
|
}
|
|
// Store response in memory
|
|
sideEffects.push({
|
|
type: 'memory',
|
|
action: 'store',
|
|
data: {
|
|
key: `llm:response:${context.correlationId}`,
|
|
value: {
|
|
provider,
|
|
model,
|
|
response: {
|
|
content: response.content.slice(0, 500), // Truncate
|
|
usage: response.usage,
|
|
cost: response.cost,
|
|
latency,
|
|
},
|
|
timestamp: Date.now(),
|
|
},
|
|
ttl: 86400, // 24 hours
|
|
},
|
|
});
|
|
return {
|
|
continue: true,
|
|
modified: false,
|
|
payload: {
|
|
...payload,
|
|
metrics: {
|
|
requestStart: metrics?.requestStart ?? Date.now(),
|
|
responseEnd: Date.now(),
|
|
latency,
|
|
cacheHit: metrics?.cacheHit,
|
|
tokenEstimate: metrics?.tokenEstimate,
|
|
costEstimate: metrics?.costEstimate,
|
|
},
|
|
},
|
|
sideEffects,
|
|
};
|
|
}
|
|
// ===== ERROR HOOK =====
|
|
export async function errorLLMCallHook(payload, error, context) {
|
|
const { provider, model, metrics } = payload;
|
|
const sideEffects = [];
|
|
const latency = metrics?.requestStart
|
|
? Date.now() - metrics.requestStart
|
|
: undefined;
|
|
// Track error metrics
|
|
sideEffects.push({
|
|
type: 'metric',
|
|
action: 'increment',
|
|
data: {
|
|
name: `llm.errors.${provider}`,
|
|
errorType: error.name,
|
|
},
|
|
}, {
|
|
type: 'log',
|
|
action: 'error',
|
|
data: {
|
|
message: `LLM call failed: ${error.message}`,
|
|
provider,
|
|
model,
|
|
latency,
|
|
errorType: error.name,
|
|
},
|
|
});
|
|
// Store error for learning
|
|
sideEffects.push({
|
|
type: 'memory',
|
|
action: 'store',
|
|
data: {
|
|
key: `llm:error:${context.correlationId}`,
|
|
value: {
|
|
provider,
|
|
model,
|
|
error: {
|
|
name: error.name,
|
|
message: error.message,
|
|
},
|
|
timestamp: Date.now(),
|
|
},
|
|
ttl: 86400,
|
|
},
|
|
});
|
|
return {
|
|
continue: true,
|
|
modified: false,
|
|
sideEffects,
|
|
};
|
|
}
|
|
function extractPatternFromResponse(request, response) {
|
|
// Simple heuristic pattern extraction
|
|
const lastUserMessage = [...request.messages]
|
|
.reverse()
|
|
.find((m) => m.role === 'user');
|
|
if (!lastUserMessage)
|
|
return null;
|
|
// Detect domain from content
|
|
const content = lastUserMessage.content.toLowerCase();
|
|
let domain = 'general';
|
|
if (content.includes('code') || content.includes('function') || content.includes('implement')) {
|
|
domain = 'code';
|
|
}
|
|
else if (content.includes('security') || content.includes('vulnerability')) {
|
|
domain = 'security';
|
|
}
|
|
else if (content.includes('test') || content.includes('spec')) {
|
|
domain = 'testing';
|
|
}
|
|
else if (content.includes('architecture') || content.includes('design')) {
|
|
domain = 'architecture';
|
|
}
|
|
else if (content.includes('performance') || content.includes('optimize')) {
|
|
domain = 'performance';
|
|
}
|
|
// Extract strategy from response
|
|
const responseContent = response.content;
|
|
const strategy = responseContent.length > 500
|
|
? responseContent.slice(0, 500)
|
|
: responseContent;
|
|
// Quality based on response length and structure
|
|
const hasCodeBlocks = responseContent.includes('```');
|
|
const hasLists = responseContent.includes('\n- ') || responseContent.includes('\n1.');
|
|
const quality = Math.min(0.9, 0.5 + (hasCodeBlocks ? 0.2 : 0) + (hasLists ? 0.1 : 0) + (responseContent.length > 1000 ? 0.1 : 0));
|
|
return {
|
|
strategy: `[${domain}] ${strategy.slice(0, 200)}...`,
|
|
domain,
|
|
quality,
|
|
};
|
|
}
|
|
// ===== CACHE MANAGEMENT =====
|
|
export function clearLLMCache() {
|
|
responseCache.clear();
|
|
}
|
|
export function getLLMCacheStats() {
|
|
let totalHits = 0;
|
|
const entries = [];
|
|
for (const [key, entry] of responseCache) {
|
|
totalHits += entry.hits;
|
|
entries.push({
|
|
key,
|
|
hits: entry.hits,
|
|
age: Date.now() - entry.timestamp,
|
|
});
|
|
}
|
|
return {
|
|
size: responseCache.size,
|
|
totalHits,
|
|
entries: entries.slice(0, 10), // Top 10
|
|
};
|
|
}
|
|
// ===== EXPORTS =====
|
|
export const llmHooks = {
|
|
preLLMCall: preLLMCallHook,
|
|
postLLMCall: postLLMCallHook,
|
|
errorLLMCall: errorLLMCallHook,
|
|
clearCache: clearLLMCache,
|
|
getCacheStats: getLLMCacheStats,
|
|
};
|
|
export default llmHooks;
|
|
//# sourceMappingURL=llm-hooks.js.map
|