415 lines
17 KiB
JavaScript
415 lines
17 KiB
JavaScript
/**
|
|
* Smart Model Optimizer - Automatically selects the best model for each agent and task
|
|
* Balances performance vs cost based on agent requirements
|
|
*/
|
|
import { logger } from './logger.js';
|
|
// Model database with performance characteristics
|
|
const MODEL_DATABASE = {
|
|
// Tier 1: Flagship Models
|
|
'claude-sonnet-4-5': {
|
|
provider: 'anthropic',
|
|
model: 'claude-sonnet-4-5-20250929',
|
|
modelName: 'Claude Sonnet 4.5',
|
|
cost_per_1m_input: 3.00,
|
|
cost_per_1m_output: 15.00,
|
|
quality_score: 95,
|
|
speed_score: 85,
|
|
cost_score: 20,
|
|
tier: 'flagship',
|
|
supports_tools: true,
|
|
strengths: ['reasoning', 'coding', 'analysis', 'production'],
|
|
weaknesses: ['cost'],
|
|
bestFor: ['coder', 'reviewer', 'architecture', 'planner', 'production-validator']
|
|
},
|
|
'gpt-4o': {
|
|
provider: 'openrouter',
|
|
model: 'openai/gpt-4o',
|
|
modelName: 'GPT-4o',
|
|
cost_per_1m_input: 2.50,
|
|
cost_per_1m_output: 10.00,
|
|
quality_score: 88,
|
|
speed_score: 90,
|
|
cost_score: 30,
|
|
tier: 'flagship',
|
|
supports_tools: true,
|
|
strengths: ['multimodal', 'speed', 'general-purpose', 'vision'],
|
|
weaknesses: ['cost'],
|
|
bestFor: ['researcher', 'analyst', 'multimodal-tasks']
|
|
},
|
|
'gemini-2-5-pro': {
|
|
provider: 'openrouter',
|
|
model: 'google/gemini-2.5-pro',
|
|
modelName: 'Gemini 2.5 Pro',
|
|
cost_per_1m_input: 1.25,
|
|
cost_per_1m_output: 5.00,
|
|
quality_score: 90,
|
|
speed_score: 75,
|
|
cost_score: 50,
|
|
tier: 'flagship',
|
|
supports_tools: true,
|
|
strengths: ['reasoning', 'large-context', 'math', 'analysis'],
|
|
weaknesses: ['speed'],
|
|
bestFor: ['planner', 'architecture', 'researcher', 'code-analyzer']
|
|
},
|
|
// Tier 2: Cost-Effective Champions
|
|
'deepseek-r1': {
|
|
provider: 'openrouter',
|
|
model: 'deepseek/deepseek-r1-0528:free',
|
|
modelName: 'DeepSeek R1',
|
|
cost_per_1m_input: 0.00,
|
|
cost_per_1m_output: 0.00,
|
|
quality_score: 90,
|
|
speed_score: 80,
|
|
cost_score: 100,
|
|
tier: 'cost-effective',
|
|
supports_tools: false, // DeepSeek R1 does NOT support tool/function calling
|
|
strengths: ['reasoning', 'coding', 'math', 'value', 'free'],
|
|
weaknesses: ['newer-model', 'no-tool-use'],
|
|
bestFor: ['coder', 'pseudocode', 'specification', 'refinement', 'tester']
|
|
},
|
|
'deepseek-chat-v3': {
|
|
provider: 'openrouter',
|
|
model: 'deepseek/deepseek-chat-v3.1:free',
|
|
modelName: 'DeepSeek Chat V3.1',
|
|
cost_per_1m_input: 0.00,
|
|
cost_per_1m_output: 0.00,
|
|
quality_score: 82,
|
|
speed_score: 90,
|
|
cost_score: 100,
|
|
tier: 'cost-effective',
|
|
supports_tools: true,
|
|
strengths: ['cost', 'speed', 'coding', 'development', 'free'],
|
|
weaknesses: ['complex-reasoning'],
|
|
bestFor: ['coder', 'reviewer', 'tester', 'backend-dev', 'cicd-engineer']
|
|
},
|
|
// Tier 3: Balanced Performance
|
|
'gemini-2-5-flash': {
|
|
provider: 'openrouter',
|
|
model: 'google/gemini-2.5-flash',
|
|
modelName: 'Gemini 2.5 Flash',
|
|
cost_per_1m_input: 0.075,
|
|
cost_per_1m_output: 0.30,
|
|
quality_score: 78,
|
|
speed_score: 98,
|
|
cost_score: 98,
|
|
tier: 'balanced',
|
|
supports_tools: true,
|
|
strengths: ['speed', 'cost', 'interactive'],
|
|
weaknesses: ['quality'],
|
|
bestFor: ['researcher', 'planner', 'smart-agent']
|
|
},
|
|
'llama-3-3-8b': {
|
|
provider: 'openrouter',
|
|
model: 'meta-llama/llama-3.3-8b-instruct:free',
|
|
modelName: 'Llama 3.3 8B',
|
|
cost_per_1m_input: 0.00,
|
|
cost_per_1m_output: 0.00,
|
|
quality_score: 72,
|
|
speed_score: 95,
|
|
cost_score: 100,
|
|
tier: 'balanced',
|
|
supports_tools: true,
|
|
strengths: ['open-source', 'versatile', 'coding', 'free', 'fast'],
|
|
weaknesses: ['smaller-model'],
|
|
bestFor: ['coder', 'reviewer', 'base-template-generator', 'tester']
|
|
},
|
|
'qwen-2-5-72b': {
|
|
provider: 'openrouter',
|
|
model: 'qwen/qwen-2.5-72b-instruct',
|
|
modelName: 'Qwen 2.5 72B',
|
|
cost_per_1m_input: 0.35,
|
|
cost_per_1m_output: 0.40,
|
|
quality_score: 81,
|
|
speed_score: 85,
|
|
cost_score: 90,
|
|
tier: 'balanced',
|
|
supports_tools: true,
|
|
strengths: ['multilingual', 'coding', 'reasoning'],
|
|
weaknesses: ['english-optimized'],
|
|
bestFor: ['researcher', 'coder', 'multilingual-tasks']
|
|
},
|
|
// Tier 4: Budget Options
|
|
'llama-3-1-8b': {
|
|
provider: 'openrouter',
|
|
model: 'meta-llama/llama-3.1-8b-instruct',
|
|
modelName: 'Llama 3.1 8B',
|
|
cost_per_1m_input: 0.06,
|
|
cost_per_1m_output: 0.06,
|
|
quality_score: 65,
|
|
speed_score: 95,
|
|
cost_score: 99,
|
|
tier: 'budget',
|
|
supports_tools: true,
|
|
strengths: ['ultra-low-cost', 'speed'],
|
|
weaknesses: ['quality', 'complex-tasks'],
|
|
bestFor: ['simple-tasks', 'testing']
|
|
},
|
|
// Tier 5: Local/Privacy
|
|
'onnx-phi-4': {
|
|
provider: 'onnx',
|
|
model: 'phi-4-mini',
|
|
modelName: 'ONNX Phi-4 Mini',
|
|
cost_per_1m_input: 0.00,
|
|
cost_per_1m_output: 0.00,
|
|
quality_score: 58,
|
|
speed_score: 30,
|
|
cost_score: 100,
|
|
tier: 'local',
|
|
supports_tools: false,
|
|
strengths: ['privacy', 'offline', 'zero-cost'],
|
|
weaknesses: ['quality', 'speed'],
|
|
bestFor: ['privacy-tasks', 'offline-tasks']
|
|
}
|
|
};
|
|
// Agent complexity and quality requirements
|
|
const AGENT_REQUIREMENTS = {
|
|
// High-quality code generation
|
|
'coder': { minQuality: 85, complexity: 'complex', needsReasoning: true },
|
|
'sparc-coder': { minQuality: 85, complexity: 'complex', needsReasoning: true },
|
|
'backend-dev': { minQuality: 80, complexity: 'complex', needsReasoning: true },
|
|
// Architecture and design
|
|
'architecture': { minQuality: 90, complexity: 'expert', needsReasoning: true },
|
|
'system-architect': { minQuality: 90, complexity: 'expert', needsReasoning: true },
|
|
'planner': { minQuality: 85, complexity: 'complex', needsReasoning: true },
|
|
// Code review and analysis
|
|
'reviewer': { minQuality: 85, complexity: 'complex', needsReasoning: true },
|
|
'code-analyzer': { minQuality: 80, complexity: 'complex', needsReasoning: true },
|
|
'production-validator': { minQuality: 90, complexity: 'expert', needsReasoning: true },
|
|
// Testing
|
|
'tester': { minQuality: 75, complexity: 'moderate', needsReasoning: false },
|
|
'tdd-london-swarm': { minQuality: 80, complexity: 'complex', needsReasoning: true },
|
|
// Research and analysis
|
|
'researcher': { minQuality: 75, complexity: 'moderate', needsReasoning: true },
|
|
'analyst': { minQuality: 80, complexity: 'complex', needsReasoning: true },
|
|
// SPARC phases
|
|
'specification': { minQuality: 85, complexity: 'complex', needsReasoning: true },
|
|
'pseudocode': { minQuality: 80, complexity: 'complex', needsReasoning: true },
|
|
'refinement': { minQuality: 85, complexity: 'complex', needsReasoning: true },
|
|
// DevOps and automation
|
|
'cicd-engineer': { minQuality: 75, complexity: 'moderate', needsReasoning: false },
|
|
'smart-agent': { minQuality: 70, complexity: 'moderate', needsReasoning: false },
|
|
// Documentation
|
|
'api-docs': { minQuality: 70, complexity: 'moderate', needsReasoning: false },
|
|
'base-template-generator': { minQuality: 70, complexity: 'simple', needsReasoning: false },
|
|
// Default for unknown agents
|
|
'default': { minQuality: 75, complexity: 'moderate', needsReasoning: true }
|
|
};
|
|
export class ModelOptimizer {
|
|
/**
|
|
* Optimize model selection based on agent, task, and priorities
|
|
*/
|
|
static optimize(criteria) {
|
|
logger.info('Optimizing model selection', criteria);
|
|
// Get agent requirements
|
|
const agentKey = criteria.agent.toLowerCase();
|
|
const agentReqs = AGENT_REQUIREMENTS[agentKey] || AGENT_REQUIREMENTS['default'];
|
|
// Determine task complexity from task description if not provided
|
|
const taskComplexity = criteria.taskComplexity || this.inferComplexity(criteria.task);
|
|
// Set default priority to balanced if not specified
|
|
const priority = criteria.priority || 'balanced';
|
|
// Filter models that support tools if required
|
|
let availableModels = Object.entries(MODEL_DATABASE);
|
|
if (criteria.requiresTools) {
|
|
availableModels = availableModels.filter(([key, model]) => model.supports_tools !== false);
|
|
logger.info(`Filtered to ${availableModels.length} models with tool support`);
|
|
}
|
|
// Score all models
|
|
const scoredModels = availableModels.map(([key, model]) => {
|
|
// Calculate overall score based on priority
|
|
let overall_score;
|
|
switch (priority) {
|
|
case 'quality':
|
|
overall_score = model.quality_score * 0.7 + model.speed_score * 0.2 + model.cost_score * 0.1;
|
|
break;
|
|
case 'cost':
|
|
overall_score = model.cost_score * 0.7 + model.quality_score * 0.2 + model.speed_score * 0.1;
|
|
break;
|
|
case 'speed':
|
|
overall_score = model.speed_score * 0.7 + model.quality_score * 0.2 + model.cost_score * 0.1;
|
|
break;
|
|
case 'privacy':
|
|
// Heavily favor local models for privacy
|
|
overall_score = model.tier === 'local' ? 100 : model.cost_score * 0.5 + model.quality_score * 0.5;
|
|
break;
|
|
case 'balanced':
|
|
default:
|
|
overall_score = model.quality_score * 0.4 + model.cost_score * 0.4 + model.speed_score * 0.2;
|
|
break;
|
|
}
|
|
// Apply agent-specific bonuses
|
|
if (model.bestFor.includes(criteria.agent.toLowerCase())) {
|
|
overall_score += 10;
|
|
}
|
|
// Apply quality threshold
|
|
if (model.quality_score < agentReqs.minQuality) {
|
|
overall_score *= 0.5; // Penalize models below quality threshold
|
|
}
|
|
// Apply complexity matching
|
|
if (taskComplexity === 'expert' && model.tier !== 'flagship') {
|
|
overall_score *= 0.7;
|
|
}
|
|
else if (taskComplexity === 'simple' && model.tier === 'flagship') {
|
|
overall_score *= 0.8; // Don't waste flagship models on simple tasks unless quality priority
|
|
}
|
|
// Apply cost cap if specified
|
|
if (criteria.maxCostPerTask) {
|
|
const estimatedCost = this.estimateCost(model, criteria.task);
|
|
if (estimatedCost > criteria.maxCostPerTask) {
|
|
overall_score *= 0.3; // Heavy penalty for exceeding budget
|
|
}
|
|
}
|
|
return {
|
|
key,
|
|
...model,
|
|
overall_score
|
|
};
|
|
});
|
|
// Sort by overall score
|
|
scoredModels.sort((a, b) => b.overall_score - a.overall_score);
|
|
// Get top recommendation
|
|
const top = scoredModels[0];
|
|
// Generate reasoning
|
|
const reasoning = this.generateReasoning(top, criteria, agentReqs, taskComplexity, priority);
|
|
const recommendation = {
|
|
provider: top.provider,
|
|
model: top.model,
|
|
modelName: top.modelName,
|
|
cost_per_1m_input: top.cost_per_1m_input,
|
|
cost_per_1m_output: top.cost_per_1m_output,
|
|
quality_score: top.quality_score,
|
|
speed_score: top.speed_score,
|
|
cost_score: top.cost_score,
|
|
overall_score: top.overall_score,
|
|
tier: top.tier,
|
|
reasoning
|
|
};
|
|
logger.info('Model optimization complete', {
|
|
selected: recommendation.modelName,
|
|
score: recommendation.overall_score
|
|
});
|
|
return recommendation;
|
|
}
|
|
/**
|
|
* Infer task complexity from task description
|
|
*/
|
|
static inferComplexity(task) {
|
|
const lowerTask = task.toLowerCase();
|
|
// Expert-level indicators
|
|
if (lowerTask.includes('architecture') ||
|
|
lowerTask.includes('design system') ||
|
|
lowerTask.includes('production') ||
|
|
lowerTask.includes('enterprise') ||
|
|
lowerTask.includes('scale') ||
|
|
lowerTask.includes('distributed')) {
|
|
return 'expert';
|
|
}
|
|
// Complex indicators
|
|
if (lowerTask.includes('implement') ||
|
|
lowerTask.includes('create') ||
|
|
lowerTask.includes('build') ||
|
|
lowerTask.includes('develop') ||
|
|
lowerTask.includes('integrate') ||
|
|
lowerTask.includes('api') ||
|
|
lowerTask.includes('database')) {
|
|
return 'complex';
|
|
}
|
|
// Simple indicators
|
|
if (lowerTask.includes('simple') ||
|
|
lowerTask.includes('basic') ||
|
|
lowerTask.includes('hello world') ||
|
|
lowerTask.includes('example') ||
|
|
lowerTask.includes('template')) {
|
|
return 'simple';
|
|
}
|
|
// Default to moderate
|
|
return 'moderate';
|
|
}
|
|
/**
|
|
* Estimate cost for a task (rough approximation)
|
|
*/
|
|
static estimateCost(model, task) {
|
|
// Rough estimate: task length + expected output
|
|
const inputTokens = Math.ceil(task.length / 4);
|
|
const outputTokens = 1000; // Assume 1K token output
|
|
const inputCost = (inputTokens / 1000000) * model.cost_per_1m_input;
|
|
const outputCost = (outputTokens / 1000000) * model.cost_per_1m_output;
|
|
return inputCost + outputCost;
|
|
}
|
|
/**
|
|
* Generate human-readable reasoning for model selection
|
|
*/
|
|
static generateReasoning(model, criteria, agentReqs, taskComplexity, priority) {
|
|
const reasons = [];
|
|
// Priority-based reasoning
|
|
switch (priority) {
|
|
case 'quality':
|
|
reasons.push(`Selected for highest quality (${model.quality_score}/100)`);
|
|
break;
|
|
case 'cost':
|
|
reasons.push(`Selected for best cost efficiency (${model.cost_score}/100)`);
|
|
break;
|
|
case 'speed':
|
|
reasons.push(`Selected for fastest response (${model.speed_score}/100)`);
|
|
break;
|
|
case 'privacy':
|
|
if (model.tier === 'local') {
|
|
reasons.push('Selected for 100% privacy (runs locally)');
|
|
}
|
|
else {
|
|
reasons.push('Best available option for privacy concerns');
|
|
}
|
|
break;
|
|
case 'balanced':
|
|
reasons.push(`Balanced selection (overall: ${Math.round(model.overall_score)}/100)`);
|
|
break;
|
|
}
|
|
// Agent-specific reasoning
|
|
if (model.bestFor.includes(criteria.agent.toLowerCase())) {
|
|
reasons.push(`Optimized for ${criteria.agent} agent tasks`);
|
|
}
|
|
// Complexity matching
|
|
if (taskComplexity === 'expert' && model.tier === 'flagship') {
|
|
reasons.push('Flagship model for expert-level complexity');
|
|
}
|
|
else if (taskComplexity === 'simple' && model.tier !== 'flagship') {
|
|
reasons.push('Cost-effective for simple tasks');
|
|
}
|
|
// Cost information
|
|
const estCost = this.estimateCost(model, criteria.task);
|
|
reasons.push(`Estimated cost: $${estCost.toFixed(6)} per task`);
|
|
// Tier information
|
|
reasons.push(`Tier: ${model.tier}`);
|
|
return reasons.join('. ');
|
|
}
|
|
/**
|
|
* Get all available models with their characteristics
|
|
*/
|
|
static getAvailableModels() {
|
|
return MODEL_DATABASE;
|
|
}
|
|
/**
|
|
* Display optimization recommendations in console
|
|
*/
|
|
static displayRecommendation(recommendation) {
|
|
console.log('\n🎯 Optimized Model Selection');
|
|
console.log('═'.repeat(60));
|
|
console.log(`Model: ${recommendation.modelName}`);
|
|
console.log(`Provider: ${recommendation.provider}`);
|
|
console.log(`Tier: ${recommendation.tier}`);
|
|
console.log('');
|
|
console.log('Scores:');
|
|
console.log(` Quality: ${recommendation.quality_score}/100`);
|
|
console.log(` Speed: ${recommendation.speed_score}/100`);
|
|
console.log(` Cost: ${recommendation.cost_score}/100`);
|
|
console.log(` Overall: ${Math.round(recommendation.overall_score)}/100`);
|
|
console.log('');
|
|
console.log('Cost: $' + recommendation.cost_per_1m_input.toFixed(2) + '/1M input, ' +
|
|
'$' + recommendation.cost_per_1m_output.toFixed(2) + '/1M output');
|
|
console.log('');
|
|
console.log('Reasoning:');
|
|
console.log(` ${recommendation.reasoning}`);
|
|
console.log('═'.repeat(60));
|
|
console.log('');
|
|
}
|
|
}
|
|
//# sourceMappingURL=modelOptimizer.js.map
|