/** * Continue Gate - Long-Running Agent Control * * Evaluates whether a long-running agent should continue its next step. * Prevents runaway loops, budget exhaustion, and coherence degradation. * * Problem: * Current gates are tool-centric (PreToolUse, PreCommand, PreEdit). * Long-run loops are often internally generated — the agent keeps going * without a single obviously bad tool call. There is no gate for "should * this agent continue at all?" * * ContinueGate provides step-level evaluation with: * - Hard limits on consecutive steps without checkpoints * - Budget acceleration detection via linear regression * - Coherence threshold enforcement * - Uncertainty threshold enforcement * - Rework ratio tracking * - Automatic checkpoint intervals * - Cooldown between evaluations * * Decision types: * - continue: Agent may proceed to next step * - checkpoint: Agent must save state before continuing * - throttle: Agent should slow down or wait * - pause: Agent should stop and await human review * - stop: Agent must halt immediately * * @module @claude-flow/guidance/continue-gate */ // ============================================================================ // Default Configuration // ============================================================================ const DEFAULT_CONFIG = { maxConsecutiveSteps: 100, maxBudgetSlopePerStep: 0.02, minCoherenceForContinue: 0.4, maxUncertaintyForContinue: 0.8, maxReworkRatio: 0.3, checkpointIntervalSteps: 25, cooldownMs: 5000, }; const MAX_HISTORY_SIZE = 10000; const SLOPE_WINDOW_SIZE = 10; // Number of recent steps to use for budget slope calculation // ============================================================================ // ContinueGate // ============================================================================ /** * Gate that evaluates whether a long-running agent should continue. * * Prevents runaway execution by checking: * - Step limits * - Budget exhaustion and acceleration * - Coherence degradation * - Uncertainty thresholds * - Rework ratios * - Checkpoint intervals * * Maintains history of evaluations and provides aggregate statistics. */ export class ContinueGate { config; history = []; lastEvaluationTime = 0; tokenHistory = []; constructor(config = {}) { this.config = { ...DEFAULT_CONFIG, ...config }; } /** * Evaluate whether the agent should continue. * * Applies decision logic in priority order: * 1. Coherence below threshold → stop * 2. Step limit exceeded → stop * 3. Budget exhausted → stop * 4. High rework ratio → pause * 5. High uncertainty → pause * 6. Budget acceleration → throttle * 7. Checkpoint interval reached → checkpoint * 8. Otherwise → continue * * @param context - Current step context * @returns Decision with reasons and metrics */ evaluate(context) { const now = Date.now(); const reasons = []; // Track token usage for slope calculation this.tokenHistory.push({ step: context.stepNumber, tokens: context.totalTokensUsed, }); if (this.tokenHistory.length > SLOPE_WINDOW_SIZE) { this.tokenHistory.shift(); } // Compute metrics const budgetSlope = this.computeBudgetSlope(); const reworkRatio = context.stepNumber > 0 ? context.reworkCount / context.stepNumber : 0; const stepsUntilCheckpoint = this.config.checkpointIntervalSteps - (context.stepNumber - context.lastCheckpointStep); const coherenceLevel = this.getCoherenceLevel(context.coherenceScore); const uncertaintyLevel = this.getUncertaintyLevel(context.uncertaintyScore); const metrics = { budgetSlope, reworkRatio, stepsUntilCheckpoint, coherenceLevel, uncertaintyLevel, }; // Decision logic (priority order) // 1. Coherence below threshold → stop if (context.coherenceScore < this.config.minCoherenceForContinue) { reasons.push(`Coherence below threshold (${context.coherenceScore.toFixed(2)} < ${this.config.minCoherenceForContinue})`); return this.createDecision('stop', reasons, metrics, 'Halt execution and review coherence issues'); } // 2. Step limit exceeded → stop if (context.stepNumber >= this.config.maxConsecutiveSteps && context.stepNumber - context.lastCheckpointStep >= this.config.checkpointIntervalSteps) { reasons.push(`Step limit exceeded (${context.stepNumber} >= ${this.config.maxConsecutiveSteps}) without recent checkpoint`); return this.createDecision('stop', reasons, metrics, 'Create checkpoint and review progress'); } // 3. Budget exhausted → stop if (context.budgetRemaining.tokens <= 0 || context.budgetRemaining.toolCalls <= 0 || context.budgetRemaining.timeMs <= 0) { const exhausted = []; if (context.budgetRemaining.tokens <= 0) exhausted.push('tokens'); if (context.budgetRemaining.toolCalls <= 0) exhausted.push('tool calls'); if (context.budgetRemaining.timeMs <= 0) exhausted.push('time'); reasons.push(`Budget exhausted: ${exhausted.join(', ')}`); return this.createDecision('stop', reasons, metrics, 'Increase budget or simplify task scope'); } // 4. High rework ratio → pause if (reworkRatio > this.config.maxReworkRatio) { reasons.push(`Rework ratio too high (${(reworkRatio * 100).toFixed(1)}% > ${(this.config.maxReworkRatio * 100).toFixed(1)}%)`); return this.createDecision('pause', reasons, metrics, 'Review recent work for repeated errors or unclear objectives'); } // 5. High uncertainty → pause if (context.uncertaintyScore > this.config.maxUncertaintyForContinue) { reasons.push(`Uncertainty too high (${context.uncertaintyScore.toFixed(2)} > ${this.config.maxUncertaintyForContinue})`); return this.createDecision('pause', reasons, metrics, 'Resolve uncertain beliefs or gather more evidence before continuing'); } // 6. Budget acceleration → throttle if (budgetSlope > this.config.maxBudgetSlopePerStep) { reasons.push(`Budget acceleration detected (slope: ${budgetSlope.toFixed(4)} > ${this.config.maxBudgetSlopePerStep})`); return this.createDecision('throttle', reasons, metrics, 'Slow down execution or optimize token usage'); } // 7. Checkpoint interval reached → checkpoint if (stepsUntilCheckpoint <= 0) { reasons.push(`Checkpoint interval reached (${context.stepNumber - context.lastCheckpointStep} >= ${this.config.checkpointIntervalSteps})`); return this.createDecision('checkpoint', reasons, metrics, 'Save current state before continuing'); } // 8. Otherwise → continue reasons.push('All checks passed'); return this.createDecision('continue', reasons, metrics); } /** * Evaluate and record the decision in history. * * This method also checks the cooldown period — if called too soon * after the last evaluation, it returns a 'continue' decision without * full evaluation to prevent excessive overhead. * * @param context - Current step context * @returns Decision with reasons and metrics */ evaluateWithHistory(context) { const now = Date.now(); // Cooldown check — but always evaluate critical stop conditions // to prevent agents from timing steps to bypass safety checks if (now - this.lastEvaluationTime < this.config.cooldownMs) { // Even during cooldown, check hard-stop conditions if (context.coherenceScore < this.config.minCoherenceForContinue) { return this.createDecision('stop', ['Coherence below threshold (checked during cooldown)'], { budgetSlope: 0, reworkRatio: 0, stepsUntilCheckpoint: 0, coherenceLevel: 'critical', uncertaintyLevel: 'low' }, 'Halt execution and review coherence issues'); } if (context.budgetRemaining.tokens <= 0 || context.budgetRemaining.toolCalls <= 0 || context.budgetRemaining.timeMs <= 0) { return this.createDecision('stop', ['Budget exhausted (checked during cooldown)'], { budgetSlope: 0, reworkRatio: 0, stepsUntilCheckpoint: 0, coherenceLevel: 'healthy', uncertaintyLevel: 'low' }, 'Increase budget or simplify task scope'); } // Non-critical checks can be skipped during cooldown return { decision: 'continue', reasons: ['Cooldown period active; skipping full evaluation'], metrics: { budgetSlope: 0, reworkRatio: 0, stepsUntilCheckpoint: 0, coherenceLevel: this.getCoherenceLevel(context.coherenceScore), uncertaintyLevel: this.getUncertaintyLevel(context.uncertaintyScore), }, }; } this.lastEvaluationTime = now; const decision = this.evaluate(context); // Record in history const record = { step: context.stepNumber, decision, timestamp: now, tokensUsed: context.totalTokensUsed, }; this.history.push(record); // Evict oldest if exceeding max size if (this.history.length > MAX_HISTORY_SIZE) { this.history.shift(); } return decision; } /** * Get the full evaluation history. * * Returns up to MAX_HISTORY_SIZE most recent evaluations. * * @returns Array of evaluation records ordered oldest to newest */ getHistory() { return this.history.map(r => ({ step: r.step, decision: { ...r.decision }, timestamp: r.timestamp, })); } /** * Get aggregate statistics across all evaluations. * * @returns Statistics including total evaluations, decision counts, and average budget slope */ getStats() { const decisions = { continue: 0, checkpoint: 0, throttle: 0, pause: 0, stop: 0, }; let totalSlope = 0; let slopeCount = 0; for (const record of this.history) { decisions[record.decision.decision]++; totalSlope += record.decision.metrics.budgetSlope; slopeCount++; } return { totalEvaluations: this.history.length, decisions, averageBudgetSlope: slopeCount > 0 ? totalSlope / slopeCount : 0, }; } /** * Reset all internal state. * * Clears history, token tracking, and last evaluation time. */ reset() { this.history.length = 0; this.tokenHistory.length = 0; this.lastEvaluationTime = 0; } /** * Get the current configuration. */ getConfig() { return { ...this.config }; } // ========================================================================= // Private Helpers // ========================================================================= /** * Compute the budget slope (rate of token consumption per step) * using linear regression on the last N steps. * * Returns the slope coefficient (tokens per step). A positive slope * indicates increasing token usage. A slope above the configured * threshold indicates budget acceleration. */ computeBudgetSlope() { if (this.tokenHistory.length < 2) { // Need at least 2 points for regression return 0; } const n = this.tokenHistory.length; let sumX = 0; let sumY = 0; let sumXY = 0; let sumXX = 0; for (const point of this.tokenHistory) { const x = point.step; const y = point.tokens; sumX += x; sumY += y; sumXY += x * y; sumXX += x * x; } // Linear regression: slope = (n*sumXY - sumX*sumY) / (n*sumXX - sumX*sumX) const denominator = n * sumXX - sumX * sumX; if (denominator === 0) { // All x values are the same (shouldn't happen, but guard) return 0; } const slope = (n * sumXY - sumX * sumY) / denominator; return slope; } /** * Map coherence score to a health level. */ getCoherenceLevel(score) { if (score >= 0.7) return 'healthy'; if (score >= 0.4) return 'degraded'; return 'critical'; } /** * Map uncertainty score to a level. */ getUncertaintyLevel(score) { if (score <= 0.3) return 'low'; if (score <= 0.6) return 'moderate'; if (score <= 0.8) return 'high'; return 'extreme'; } /** * Create a standardized decision object. */ createDecision(decision, reasons, metrics, recommendedAction) { return { decision, reasons, metrics, recommendedAction, }; } } // ============================================================================ // Factory Function // ============================================================================ /** * Create a ContinueGate instance with optional configuration. * * @param config - Partial configuration; unspecified values use defaults * @returns A fresh ContinueGate instance */ export function createContinueGate(config) { return new ContinueGate(config); } //# sourceMappingURL=continue-gate.js.map