tasq/node_modules/agentic-flow/dist/utils/modelOptimizer.js

415 lines
17 KiB
JavaScript

/**
* Smart Model Optimizer - Automatically selects the best model for each agent and task
* Balances performance vs cost based on agent requirements
*/
import { logger } from './logger.js';
// Model database with performance characteristics
const MODEL_DATABASE = {
// Tier 1: Flagship Models
'claude-sonnet-4-5': {
provider: 'anthropic',
model: 'claude-sonnet-4-5-20250929',
modelName: 'Claude Sonnet 4.5',
cost_per_1m_input: 3.00,
cost_per_1m_output: 15.00,
quality_score: 95,
speed_score: 85,
cost_score: 20,
tier: 'flagship',
supports_tools: true,
strengths: ['reasoning', 'coding', 'analysis', 'production'],
weaknesses: ['cost'],
bestFor: ['coder', 'reviewer', 'architecture', 'planner', 'production-validator']
},
'gpt-4o': {
provider: 'openrouter',
model: 'openai/gpt-4o',
modelName: 'GPT-4o',
cost_per_1m_input: 2.50,
cost_per_1m_output: 10.00,
quality_score: 88,
speed_score: 90,
cost_score: 30,
tier: 'flagship',
supports_tools: true,
strengths: ['multimodal', 'speed', 'general-purpose', 'vision'],
weaknesses: ['cost'],
bestFor: ['researcher', 'analyst', 'multimodal-tasks']
},
'gemini-2-5-pro': {
provider: 'openrouter',
model: 'google/gemini-2.5-pro',
modelName: 'Gemini 2.5 Pro',
cost_per_1m_input: 1.25,
cost_per_1m_output: 5.00,
quality_score: 90,
speed_score: 75,
cost_score: 50,
tier: 'flagship',
supports_tools: true,
strengths: ['reasoning', 'large-context', 'math', 'analysis'],
weaknesses: ['speed'],
bestFor: ['planner', 'architecture', 'researcher', 'code-analyzer']
},
// Tier 2: Cost-Effective Champions
'deepseek-r1': {
provider: 'openrouter',
model: 'deepseek/deepseek-r1-0528:free',
modelName: 'DeepSeek R1',
cost_per_1m_input: 0.00,
cost_per_1m_output: 0.00,
quality_score: 90,
speed_score: 80,
cost_score: 100,
tier: 'cost-effective',
supports_tools: false, // DeepSeek R1 does NOT support tool/function calling
strengths: ['reasoning', 'coding', 'math', 'value', 'free'],
weaknesses: ['newer-model', 'no-tool-use'],
bestFor: ['coder', 'pseudocode', 'specification', 'refinement', 'tester']
},
'deepseek-chat-v3': {
provider: 'openrouter',
model: 'deepseek/deepseek-chat-v3.1:free',
modelName: 'DeepSeek Chat V3.1',
cost_per_1m_input: 0.00,
cost_per_1m_output: 0.00,
quality_score: 82,
speed_score: 90,
cost_score: 100,
tier: 'cost-effective',
supports_tools: true,
strengths: ['cost', 'speed', 'coding', 'development', 'free'],
weaknesses: ['complex-reasoning'],
bestFor: ['coder', 'reviewer', 'tester', 'backend-dev', 'cicd-engineer']
},
// Tier 3: Balanced Performance
'gemini-2-5-flash': {
provider: 'openrouter',
model: 'google/gemini-2.5-flash',
modelName: 'Gemini 2.5 Flash',
cost_per_1m_input: 0.075,
cost_per_1m_output: 0.30,
quality_score: 78,
speed_score: 98,
cost_score: 98,
tier: 'balanced',
supports_tools: true,
strengths: ['speed', 'cost', 'interactive'],
weaknesses: ['quality'],
bestFor: ['researcher', 'planner', 'smart-agent']
},
'llama-3-3-8b': {
provider: 'openrouter',
model: 'meta-llama/llama-3.3-8b-instruct:free',
modelName: 'Llama 3.3 8B',
cost_per_1m_input: 0.00,
cost_per_1m_output: 0.00,
quality_score: 72,
speed_score: 95,
cost_score: 100,
tier: 'balanced',
supports_tools: true,
strengths: ['open-source', 'versatile', 'coding', 'free', 'fast'],
weaknesses: ['smaller-model'],
bestFor: ['coder', 'reviewer', 'base-template-generator', 'tester']
},
'qwen-2-5-72b': {
provider: 'openrouter',
model: 'qwen/qwen-2.5-72b-instruct',
modelName: 'Qwen 2.5 72B',
cost_per_1m_input: 0.35,
cost_per_1m_output: 0.40,
quality_score: 81,
speed_score: 85,
cost_score: 90,
tier: 'balanced',
supports_tools: true,
strengths: ['multilingual', 'coding', 'reasoning'],
weaknesses: ['english-optimized'],
bestFor: ['researcher', 'coder', 'multilingual-tasks']
},
// Tier 4: Budget Options
'llama-3-1-8b': {
provider: 'openrouter',
model: 'meta-llama/llama-3.1-8b-instruct',
modelName: 'Llama 3.1 8B',
cost_per_1m_input: 0.06,
cost_per_1m_output: 0.06,
quality_score: 65,
speed_score: 95,
cost_score: 99,
tier: 'budget',
supports_tools: true,
strengths: ['ultra-low-cost', 'speed'],
weaknesses: ['quality', 'complex-tasks'],
bestFor: ['simple-tasks', 'testing']
},
// Tier 5: Local/Privacy
'onnx-phi-4': {
provider: 'onnx',
model: 'phi-4-mini',
modelName: 'ONNX Phi-4 Mini',
cost_per_1m_input: 0.00,
cost_per_1m_output: 0.00,
quality_score: 58,
speed_score: 30,
cost_score: 100,
tier: 'local',
supports_tools: false,
strengths: ['privacy', 'offline', 'zero-cost'],
weaknesses: ['quality', 'speed'],
bestFor: ['privacy-tasks', 'offline-tasks']
}
};
// Agent complexity and quality requirements
const AGENT_REQUIREMENTS = {
// High-quality code generation
'coder': { minQuality: 85, complexity: 'complex', needsReasoning: true },
'sparc-coder': { minQuality: 85, complexity: 'complex', needsReasoning: true },
'backend-dev': { minQuality: 80, complexity: 'complex', needsReasoning: true },
// Architecture and design
'architecture': { minQuality: 90, complexity: 'expert', needsReasoning: true },
'system-architect': { minQuality: 90, complexity: 'expert', needsReasoning: true },
'planner': { minQuality: 85, complexity: 'complex', needsReasoning: true },
// Code review and analysis
'reviewer': { minQuality: 85, complexity: 'complex', needsReasoning: true },
'code-analyzer': { minQuality: 80, complexity: 'complex', needsReasoning: true },
'production-validator': { minQuality: 90, complexity: 'expert', needsReasoning: true },
// Testing
'tester': { minQuality: 75, complexity: 'moderate', needsReasoning: false },
'tdd-london-swarm': { minQuality: 80, complexity: 'complex', needsReasoning: true },
// Research and analysis
'researcher': { minQuality: 75, complexity: 'moderate', needsReasoning: true },
'analyst': { minQuality: 80, complexity: 'complex', needsReasoning: true },
// SPARC phases
'specification': { minQuality: 85, complexity: 'complex', needsReasoning: true },
'pseudocode': { minQuality: 80, complexity: 'complex', needsReasoning: true },
'refinement': { minQuality: 85, complexity: 'complex', needsReasoning: true },
// DevOps and automation
'cicd-engineer': { minQuality: 75, complexity: 'moderate', needsReasoning: false },
'smart-agent': { minQuality: 70, complexity: 'moderate', needsReasoning: false },
// Documentation
'api-docs': { minQuality: 70, complexity: 'moderate', needsReasoning: false },
'base-template-generator': { minQuality: 70, complexity: 'simple', needsReasoning: false },
// Default for unknown agents
'default': { minQuality: 75, complexity: 'moderate', needsReasoning: true }
};
export class ModelOptimizer {
/**
* Optimize model selection based on agent, task, and priorities
*/
static optimize(criteria) {
logger.info('Optimizing model selection', criteria);
// Get agent requirements
const agentKey = criteria.agent.toLowerCase();
const agentReqs = AGENT_REQUIREMENTS[agentKey] || AGENT_REQUIREMENTS['default'];
// Determine task complexity from task description if not provided
const taskComplexity = criteria.taskComplexity || this.inferComplexity(criteria.task);
// Set default priority to balanced if not specified
const priority = criteria.priority || 'balanced';
// Filter models that support tools if required
let availableModels = Object.entries(MODEL_DATABASE);
if (criteria.requiresTools) {
availableModels = availableModels.filter(([key, model]) => model.supports_tools !== false);
logger.info(`Filtered to ${availableModels.length} models with tool support`);
}
// Score all models
const scoredModels = availableModels.map(([key, model]) => {
// Calculate overall score based on priority
let overall_score;
switch (priority) {
case 'quality':
overall_score = model.quality_score * 0.7 + model.speed_score * 0.2 + model.cost_score * 0.1;
break;
case 'cost':
overall_score = model.cost_score * 0.7 + model.quality_score * 0.2 + model.speed_score * 0.1;
break;
case 'speed':
overall_score = model.speed_score * 0.7 + model.quality_score * 0.2 + model.cost_score * 0.1;
break;
case 'privacy':
// Heavily favor local models for privacy
overall_score = model.tier === 'local' ? 100 : model.cost_score * 0.5 + model.quality_score * 0.5;
break;
case 'balanced':
default:
overall_score = model.quality_score * 0.4 + model.cost_score * 0.4 + model.speed_score * 0.2;
break;
}
// Apply agent-specific bonuses
if (model.bestFor.includes(criteria.agent.toLowerCase())) {
overall_score += 10;
}
// Apply quality threshold
if (model.quality_score < agentReqs.minQuality) {
overall_score *= 0.5; // Penalize models below quality threshold
}
// Apply complexity matching
if (taskComplexity === 'expert' && model.tier !== 'flagship') {
overall_score *= 0.7;
}
else if (taskComplexity === 'simple' && model.tier === 'flagship') {
overall_score *= 0.8; // Don't waste flagship models on simple tasks unless quality priority
}
// Apply cost cap if specified
if (criteria.maxCostPerTask) {
const estimatedCost = this.estimateCost(model, criteria.task);
if (estimatedCost > criteria.maxCostPerTask) {
overall_score *= 0.3; // Heavy penalty for exceeding budget
}
}
return {
key,
...model,
overall_score
};
});
// Sort by overall score
scoredModels.sort((a, b) => b.overall_score - a.overall_score);
// Get top recommendation
const top = scoredModels[0];
// Generate reasoning
const reasoning = this.generateReasoning(top, criteria, agentReqs, taskComplexity, priority);
const recommendation = {
provider: top.provider,
model: top.model,
modelName: top.modelName,
cost_per_1m_input: top.cost_per_1m_input,
cost_per_1m_output: top.cost_per_1m_output,
quality_score: top.quality_score,
speed_score: top.speed_score,
cost_score: top.cost_score,
overall_score: top.overall_score,
tier: top.tier,
reasoning
};
logger.info('Model optimization complete', {
selected: recommendation.modelName,
score: recommendation.overall_score
});
return recommendation;
}
/**
* Infer task complexity from task description
*/
static inferComplexity(task) {
const lowerTask = task.toLowerCase();
// Expert-level indicators
if (lowerTask.includes('architecture') ||
lowerTask.includes('design system') ||
lowerTask.includes('production') ||
lowerTask.includes('enterprise') ||
lowerTask.includes('scale') ||
lowerTask.includes('distributed')) {
return 'expert';
}
// Complex indicators
if (lowerTask.includes('implement') ||
lowerTask.includes('create') ||
lowerTask.includes('build') ||
lowerTask.includes('develop') ||
lowerTask.includes('integrate') ||
lowerTask.includes('api') ||
lowerTask.includes('database')) {
return 'complex';
}
// Simple indicators
if (lowerTask.includes('simple') ||
lowerTask.includes('basic') ||
lowerTask.includes('hello world') ||
lowerTask.includes('example') ||
lowerTask.includes('template')) {
return 'simple';
}
// Default to moderate
return 'moderate';
}
/**
* Estimate cost for a task (rough approximation)
*/
static estimateCost(model, task) {
// Rough estimate: task length + expected output
const inputTokens = Math.ceil(task.length / 4);
const outputTokens = 1000; // Assume 1K token output
const inputCost = (inputTokens / 1000000) * model.cost_per_1m_input;
const outputCost = (outputTokens / 1000000) * model.cost_per_1m_output;
return inputCost + outputCost;
}
/**
* Generate human-readable reasoning for model selection
*/
static generateReasoning(model, criteria, agentReqs, taskComplexity, priority) {
const reasons = [];
// Priority-based reasoning
switch (priority) {
case 'quality':
reasons.push(`Selected for highest quality (${model.quality_score}/100)`);
break;
case 'cost':
reasons.push(`Selected for best cost efficiency (${model.cost_score}/100)`);
break;
case 'speed':
reasons.push(`Selected for fastest response (${model.speed_score}/100)`);
break;
case 'privacy':
if (model.tier === 'local') {
reasons.push('Selected for 100% privacy (runs locally)');
}
else {
reasons.push('Best available option for privacy concerns');
}
break;
case 'balanced':
reasons.push(`Balanced selection (overall: ${Math.round(model.overall_score)}/100)`);
break;
}
// Agent-specific reasoning
if (model.bestFor.includes(criteria.agent.toLowerCase())) {
reasons.push(`Optimized for ${criteria.agent} agent tasks`);
}
// Complexity matching
if (taskComplexity === 'expert' && model.tier === 'flagship') {
reasons.push('Flagship model for expert-level complexity');
}
else if (taskComplexity === 'simple' && model.tier !== 'flagship') {
reasons.push('Cost-effective for simple tasks');
}
// Cost information
const estCost = this.estimateCost(model, criteria.task);
reasons.push(`Estimated cost: $${estCost.toFixed(6)} per task`);
// Tier information
reasons.push(`Tier: ${model.tier}`);
return reasons.join('. ');
}
/**
* Get all available models with their characteristics
*/
static getAvailableModels() {
return MODEL_DATABASE;
}
/**
* Display optimization recommendations in console
*/
static displayRecommendation(recommendation) {
console.log('\n🎯 Optimized Model Selection');
console.log('═'.repeat(60));
console.log(`Model: ${recommendation.modelName}`);
console.log(`Provider: ${recommendation.provider}`);
console.log(`Tier: ${recommendation.tier}`);
console.log('');
console.log('Scores:');
console.log(` Quality: ${recommendation.quality_score}/100`);
console.log(` Speed: ${recommendation.speed_score}/100`);
console.log(` Cost: ${recommendation.cost_score}/100`);
console.log(` Overall: ${Math.round(recommendation.overall_score)}/100`);
console.log('');
console.log('Cost: $' + recommendation.cost_per_1m_input.toFixed(2) + '/1M input, ' +
'$' + recommendation.cost_per_1m_output.toFixed(2) + '/1M output');
console.log('');
console.log('Reasoning:');
console.log(` ${recommendation.reasoning}`);
console.log('═'.repeat(60));
console.log('');
}
}
//# sourceMappingURL=modelOptimizer.js.map