tasq/node_modules/@ruvector/ruvllm/bin/cli.js

#!/usr/bin/env node
/**
 * RuvLLM CLI - Self-learning LLM orchestration
 *
 * Usage:
 *   ruvllm query "What is machine learning?"
 *   ruvllm generate "Write a haiku about AI"
 *   ruvllm memory add "Important context"
 *   ruvllm memory search "context"
 *   ruvllm stats
 *   ruvllm benchmark
 */

const { RuvLLM, SimdOps, version, hasSimdSupport } = require('../dist/cjs/index.js');

const args = process.argv.slice(2);
const command = args[0];

// Parse CLI arguments
function parseArgs(args) {
  const result = { flags: {}, positional: [] };
  for (let i = 0; i < args.length; i++) {
    const arg = args[i];
    if (arg.startsWith('--')) {
      const key = arg.slice(2);
      const nextArg = args[i + 1];
      if (nextArg && !nextArg.startsWith('--')) {
        result.flags[key] = nextArg;
        i++;
      } else {
        result.flags[key] = true;
      }
    } else if (!result.command) {
      result.command = arg;
    } else {
      result.positional.push(arg);
    }
  }
  return result;
}

// Format output
function formatJson(obj) {
  return JSON.stringify(obj, null, 2);
}

function formatTable(data) {
  const maxKeyLen = Math.max(...Object.keys(data).map(k => k.length));
  return Object.entries(data)
    .map(([k, v]) => `  ${k.padEnd(maxKeyLen)} : ${v}`)
    .join('\n');
}

// Commands
async function runQuery(llm, text, flags) {
  const config = {};
  if (flags.temperature) config.temperature = parseFloat(flags.temperature);
  if (flags['max-tokens']) config.maxTokens = parseInt(flags['max-tokens']);
  if (flags['top-p']) config.topP = parseFloat(flags['top-p']);
  if (flags['top-k']) config.topK = parseInt(flags['top-k']);

  const response = llm.query(text, config);

  if (flags.json) {
    console.log(formatJson(response));
  } else {
    console.log('\n' + response.text);
    console.log(`\n--- Model: ${response.model} | Confidence: ${(response.confidence * 100).toFixed(1)}% | Latency: ${response.latencyMs.toFixed(2)}ms ---`);
  }
}

async function runGenerate(llm, prompt, flags) {
  const config = {};
  if (flags.temperature) config.temperature = parseFloat(flags.temperature);
  if (flags['max-tokens']) config.maxTokens = parseInt(flags['max-tokens']);
  if (flags['top-p']) config.topP = parseFloat(flags['top-p']);

  const text = llm.generate(prompt, config);
  console.log(text);
}

async function runMemoryAdd(llm, content, flags) {
  const metadata = flags.metadata ? JSON.parse(flags.metadata) : undefined;
  const id = llm.addMemory(content, metadata);
  console.log(`Added memory with ID: ${id}`);
}

async function runMemorySearch(llm, query, flags) {
  const k = flags.k ? parseInt(flags.k) : 10;
  const results = llm.searchMemory(query, k);

  if (flags.json) {
    console.log(formatJson(results));
  } else {
    if (results.length === 0) {
      console.log('No results found.');
      return;
    }
    results.forEach((r, i) => {
      console.log(`\n[${i + 1}] Score: ${r.score.toFixed(4)} | ID: ${r.id}`);
      console.log(`    ${r.content.slice(0, 100)}${r.content.length > 100 ? '...' : ''}`);
    });
  }
}

async function runStats(llm, flags) {
  const stats = llm.stats();

  if (flags.json) {
    console.log(formatJson(stats));
  } else {
    console.log('\nRuvLLM Statistics:');
    console.log(formatTable({
      'Total Queries': stats.totalQueries,
      'Memory Nodes': stats.memoryNodes,
      'Patterns Learned': stats.patternsLearned,
      'Avg Latency': `${stats.avgLatencyMs.toFixed(2)}ms`,
      'Cache Hit Rate': `${(stats.cacheHitRate * 100).toFixed(1)}%`,
      'Router Accuracy': `${(stats.routerAccuracy * 100).toFixed(1)}%`,
    }));
  }
}

async function runRoute(llm, text, flags) {
  const decision = llm.route(text);

  if (flags.json) {
    console.log(formatJson(decision));
  } else {
    console.log('\nRouting Decision:');
    console.log(formatTable({
      'Model': decision.model,
      'Context Size': decision.contextSize,
      'Temperature': decision.temperature.toFixed(2),
      'Top-P': decision.topP.toFixed(2),
      'Confidence': `${(decision.confidence * 100).toFixed(1)}%`,
    }));
  }
}

async function runEmbed(llm, text, flags) {
  const embedding = llm.embed(text);

  if (flags.json) {
    console.log(formatJson({ embedding, dimensions: embedding.length }));
  } else {
    console.log(`Embedding (${embedding.length} dimensions):`);
    console.log(`  First 10: [${embedding.slice(0, 10).map(x => x.toFixed(4)).join(', ')}...]`);
    console.log(`  Norm: ${Math.sqrt(embedding.reduce((s, x) => s + x * x, 0)).toFixed(4)}`);
  }
}

async function runSimilarity(llm, text1, text2, flags) {
  const score = llm.similarity(text1, text2);

  if (flags.json) {
    console.log(formatJson({ text1, text2, similarity: score }));
  } else {
    console.log(`Similarity: ${(score * 100).toFixed(2)}%`);
  }
}

async function runBenchmark(flags) {
  const simd = new SimdOps();
  const dims = flags.dims ? parseInt(flags.dims) : 768;
  const iterations = flags.iterations ? parseInt(flags.iterations) : 1000;

  // Generate test vectors
  const a = Array.from({ length: dims }, () => Math.random());
  const b = Array.from({ length: dims }, () => Math.random());

  console.log(`\nBenchmark: ${dims} dimensions, ${iterations} iterations`);
  console.log(`SIMD: ${simd.isNative() ? 'Native' : 'JavaScript fallback'}`);
  console.log(`Capabilities: ${simd.capabilities().join(', ')}`);
  console.log('');

  // Dot product benchmark
  let start = Date.now();
  for (let i = 0; i < iterations; i++) {
    simd.dotProduct(a, b);
  }
  let elapsed = Date.now() - start;
  console.log(`Dot Product:        ${elapsed}ms (${(iterations / elapsed * 1000).toFixed(0)} ops/sec)`);

  // Cosine similarity benchmark
  start = Date.now();
  for (let i = 0; i < iterations; i++) {
    simd.cosineSimilarity(a, b);
  }
  elapsed = Date.now() - start;
  console.log(`Cosine Similarity:  ${elapsed}ms (${(iterations / elapsed * 1000).toFixed(0)} ops/sec)`);

  // L2 distance benchmark
  start = Date.now();
  for (let i = 0; i < iterations; i++) {
    simd.l2Distance(a, b);
  }
  elapsed = Date.now() - start;
  console.log(`L2 Distance:        ${elapsed}ms (${(iterations / elapsed * 1000).toFixed(0)} ops/sec)`);

  // Softmax benchmark
  start = Date.now();
  for (let i = 0; i < iterations; i++) {
    simd.softmax(a);
  }
  elapsed = Date.now() - start;
  console.log(`Softmax:            ${elapsed}ms (${(iterations / elapsed * 1000).toFixed(0)} ops/sec)`);
}

async function runInfo(flags) {
  const llm = new RuvLLM();

  const info = {
    version: version(),
    native: llm.isNativeLoaded(),
    simd: hasSimdSupport(),
    capabilities: llm.simdCapabilities(),
    platform: process.platform,
    arch: process.arch,
    nodeVersion: process.version,
  };

  if (flags.json) {
    console.log(formatJson(info));
  } else {
    console.log('\nRuvLLM Info:');
    console.log(formatTable({
      'Version': info.version,
      'Native Module': info.native ? 'Loaded' : 'Fallback (JS)',
      'SIMD Support': info.simd ? 'Yes' : 'No',
      'Capabilities': info.capabilities.join(', '),
      'Platform': `${info.platform}-${info.arch}`,
      'Node.js': info.nodeVersion,
    }));
  }
}

function printHelp() {
  console.log(`
RuvLLM - Self-learning LLM Orchestration

Usage: ruvllm <command> [options]

Commands:
  query <text>              Query the LLM with automatic routing
  generate <prompt>         Generate text with SIMD inference
  route <text>              Get routing decision for query
  memory add <content>      Add content to memory
  memory search <query>     Search memory for similar content
  embed <text>              Get embedding for text
  similarity <t1> <t2>      Compute similarity between texts
  stats                     Show engine statistics
  benchmark                 Run SIMD performance benchmark
  info                      Show system information
  help                      Show this help message

Options:
  --json                    Output as JSON
  --temperature <float>     Sampling temperature (0.0-2.0)
  --max-tokens <int>        Maximum tokens to generate
  --top-p <float>           Nucleus sampling (0.0-1.0)
  --top-k <int>             Top-k sampling
  --k <int>                 Number of results for search
  --metadata <json>         Metadata for memory add
  --dims <int>              Dimensions for benchmark (default: 768)
  --iterations <int>        Iterations for benchmark (default: 1000)

Examples:
  ruvllm query "What is machine learning?"
  ruvllm generate "Write a poem about AI" --temperature 0.9
  ruvllm memory add "Important context" --metadata '{"type":"note"}'
  ruvllm memory search "context" --k 5
  ruvllm similarity "hello world" "hi there"
  ruvllm benchmark --dims 1024 --iterations 5000

Learn more: https://github.com/ruvnet/ruvector
`);
}

// Main
async function main() {
  const parsed = parseArgs(args);
  const { command, positional, flags } = parsed;

  if (!command || command === 'help' || flags.help) {
    printHelp();
    return;
  }

  // Create engine for commands that need it
  const llm = new RuvLLM({
    embeddingDim: flags.dim ? parseInt(flags.dim) : 768,
    learningEnabled: flags['no-learning'] ? false : true,
  });

  try {
    switch (command) {
      case 'query':
        if (!positional[0]) {
          console.error('Error: query text required');
          process.exit(1);
        }
        await runQuery(llm, positional[0], flags);
        break;

      case 'generate':
        if (!positional[0]) {
          console.error('Error: prompt required');
          process.exit(1);
        }
        await runGenerate(llm, positional[0], flags);
        break;

      case 'route':
        if (!positional[0]) {
          console.error('Error: text required');
          process.exit(1);
        }
        await runRoute(llm, positional[0], flags);
        break;

      case 'memory':
        const subcommand = positional[0];
        if (subcommand === 'add') {
          if (!positional[1]) {
            console.error('Error: content required');
            process.exit(1);
          }
          await runMemoryAdd(llm, positional[1], flags);
        } else if (subcommand === 'search') {
          if (!positional[1]) {
            console.error('Error: query required');
            process.exit(1);
          }
          await runMemorySearch(llm, positional[1], flags);
        } else {
          console.error('Error: unknown memory subcommand. Use "add" or "search"');
          process.exit(1);
        }
        break;

      case 'embed':
        if (!positional[0]) {
          console.error('Error: text required');
          process.exit(1);
        }
        await runEmbed(llm, positional[0], flags);
        break;

      case 'similarity':
        if (!positional[0] || !positional[1]) {
          console.error('Error: two texts required');
          process.exit(1);
        }
        await runSimilarity(llm, positional[0], positional[1], flags);
        break;

      case 'stats':
        await runStats(llm, flags);
        break;

      case 'benchmark':
        await runBenchmark(flags);
        break;

      case 'info':
        await runInfo(flags);
        break;

      default:
        console.error(`Unknown command: ${command}`);
        console.error('Run "ruvllm help" for usage information.');
        process.exit(1);
    }
  } catch (error) {
    console.error('Error:', error.message);
    if (flags.verbose) {
      console.error(error.stack);
    }
    process.exit(1);
  }
}

main().catch(err => {
  console.error('Fatal error:', err);
  process.exit(1);
});