tasq/node_modules/@claude-flow/guidance/dist/continue-gate.js

353 lines
14 KiB
JavaScript

/**
* Continue Gate - Long-Running Agent Control
*
* Evaluates whether a long-running agent should continue its next step.
* Prevents runaway loops, budget exhaustion, and coherence degradation.
*
* Problem:
* Current gates are tool-centric (PreToolUse, PreCommand, PreEdit).
* Long-run loops are often internally generated — the agent keeps going
* without a single obviously bad tool call. There is no gate for "should
* this agent continue at all?"
*
* ContinueGate provides step-level evaluation with:
* - Hard limits on consecutive steps without checkpoints
* - Budget acceleration detection via linear regression
* - Coherence threshold enforcement
* - Uncertainty threshold enforcement
* - Rework ratio tracking
* - Automatic checkpoint intervals
* - Cooldown between evaluations
*
* Decision types:
* - continue: Agent may proceed to next step
* - checkpoint: Agent must save state before continuing
* - throttle: Agent should slow down or wait
* - pause: Agent should stop and await human review
* - stop: Agent must halt immediately
*
* @module @claude-flow/guidance/continue-gate
*/
// ============================================================================
// Default Configuration
// ============================================================================
const DEFAULT_CONFIG = {
maxConsecutiveSteps: 100,
maxBudgetSlopePerStep: 0.02,
minCoherenceForContinue: 0.4,
maxUncertaintyForContinue: 0.8,
maxReworkRatio: 0.3,
checkpointIntervalSteps: 25,
cooldownMs: 5000,
};
const MAX_HISTORY_SIZE = 10000;
const SLOPE_WINDOW_SIZE = 10; // Number of recent steps to use for budget slope calculation
// ============================================================================
// ContinueGate
// ============================================================================
/**
* Gate that evaluates whether a long-running agent should continue.
*
* Prevents runaway execution by checking:
* - Step limits
* - Budget exhaustion and acceleration
* - Coherence degradation
* - Uncertainty thresholds
* - Rework ratios
* - Checkpoint intervals
*
* Maintains history of evaluations and provides aggregate statistics.
*/
export class ContinueGate {
config;
history = [];
lastEvaluationTime = 0;
tokenHistory = [];
constructor(config = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
}
/**
* Evaluate whether the agent should continue.
*
* Applies decision logic in priority order:
* 1. Coherence below threshold → stop
* 2. Step limit exceeded → stop
* 3. Budget exhausted → stop
* 4. High rework ratio → pause
* 5. High uncertainty → pause
* 6. Budget acceleration → throttle
* 7. Checkpoint interval reached → checkpoint
* 8. Otherwise → continue
*
* @param context - Current step context
* @returns Decision with reasons and metrics
*/
evaluate(context) {
const now = Date.now();
const reasons = [];
// Track token usage for slope calculation
this.tokenHistory.push({
step: context.stepNumber,
tokens: context.totalTokensUsed,
});
if (this.tokenHistory.length > SLOPE_WINDOW_SIZE) {
this.tokenHistory.shift();
}
// Compute metrics
const budgetSlope = this.computeBudgetSlope();
const reworkRatio = context.stepNumber > 0
? context.reworkCount / context.stepNumber
: 0;
const stepsUntilCheckpoint = this.config.checkpointIntervalSteps -
(context.stepNumber - context.lastCheckpointStep);
const coherenceLevel = this.getCoherenceLevel(context.coherenceScore);
const uncertaintyLevel = this.getUncertaintyLevel(context.uncertaintyScore);
const metrics = {
budgetSlope,
reworkRatio,
stepsUntilCheckpoint,
coherenceLevel,
uncertaintyLevel,
};
// Decision logic (priority order)
// 1. Coherence below threshold → stop
if (context.coherenceScore < this.config.minCoherenceForContinue) {
reasons.push(`Coherence below threshold (${context.coherenceScore.toFixed(2)} < ${this.config.minCoherenceForContinue})`);
return this.createDecision('stop', reasons, metrics, 'Halt execution and review coherence issues');
}
// 2. Step limit exceeded → stop
if (context.stepNumber >= this.config.maxConsecutiveSteps &&
context.stepNumber - context.lastCheckpointStep >= this.config.checkpointIntervalSteps) {
reasons.push(`Step limit exceeded (${context.stepNumber} >= ${this.config.maxConsecutiveSteps}) without recent checkpoint`);
return this.createDecision('stop', reasons, metrics, 'Create checkpoint and review progress');
}
// 3. Budget exhausted → stop
if (context.budgetRemaining.tokens <= 0 ||
context.budgetRemaining.toolCalls <= 0 ||
context.budgetRemaining.timeMs <= 0) {
const exhausted = [];
if (context.budgetRemaining.tokens <= 0)
exhausted.push('tokens');
if (context.budgetRemaining.toolCalls <= 0)
exhausted.push('tool calls');
if (context.budgetRemaining.timeMs <= 0)
exhausted.push('time');
reasons.push(`Budget exhausted: ${exhausted.join(', ')}`);
return this.createDecision('stop', reasons, metrics, 'Increase budget or simplify task scope');
}
// 4. High rework ratio → pause
if (reworkRatio > this.config.maxReworkRatio) {
reasons.push(`Rework ratio too high (${(reworkRatio * 100).toFixed(1)}% > ${(this.config.maxReworkRatio * 100).toFixed(1)}%)`);
return this.createDecision('pause', reasons, metrics, 'Review recent work for repeated errors or unclear objectives');
}
// 5. High uncertainty → pause
if (context.uncertaintyScore > this.config.maxUncertaintyForContinue) {
reasons.push(`Uncertainty too high (${context.uncertaintyScore.toFixed(2)} > ${this.config.maxUncertaintyForContinue})`);
return this.createDecision('pause', reasons, metrics, 'Resolve uncertain beliefs or gather more evidence before continuing');
}
// 6. Budget acceleration → throttle
if (budgetSlope > this.config.maxBudgetSlopePerStep) {
reasons.push(`Budget acceleration detected (slope: ${budgetSlope.toFixed(4)} > ${this.config.maxBudgetSlopePerStep})`);
return this.createDecision('throttle', reasons, metrics, 'Slow down execution or optimize token usage');
}
// 7. Checkpoint interval reached → checkpoint
if (stepsUntilCheckpoint <= 0) {
reasons.push(`Checkpoint interval reached (${context.stepNumber - context.lastCheckpointStep} >= ${this.config.checkpointIntervalSteps})`);
return this.createDecision('checkpoint', reasons, metrics, 'Save current state before continuing');
}
// 8. Otherwise → continue
reasons.push('All checks passed');
return this.createDecision('continue', reasons, metrics);
}
/**
* Evaluate and record the decision in history.
*
* This method also checks the cooldown period — if called too soon
* after the last evaluation, it returns a 'continue' decision without
* full evaluation to prevent excessive overhead.
*
* @param context - Current step context
* @returns Decision with reasons and metrics
*/
evaluateWithHistory(context) {
const now = Date.now();
// Cooldown check — but always evaluate critical stop conditions
// to prevent agents from timing steps to bypass safety checks
if (now - this.lastEvaluationTime < this.config.cooldownMs) {
// Even during cooldown, check hard-stop conditions
if (context.coherenceScore < this.config.minCoherenceForContinue) {
return this.createDecision('stop', ['Coherence below threshold (checked during cooldown)'], { budgetSlope: 0, reworkRatio: 0, stepsUntilCheckpoint: 0, coherenceLevel: 'critical', uncertaintyLevel: 'low' }, 'Halt execution and review coherence issues');
}
if (context.budgetRemaining.tokens <= 0 || context.budgetRemaining.toolCalls <= 0 || context.budgetRemaining.timeMs <= 0) {
return this.createDecision('stop', ['Budget exhausted (checked during cooldown)'], { budgetSlope: 0, reworkRatio: 0, stepsUntilCheckpoint: 0, coherenceLevel: 'healthy', uncertaintyLevel: 'low' }, 'Increase budget or simplify task scope');
}
// Non-critical checks can be skipped during cooldown
return {
decision: 'continue',
reasons: ['Cooldown period active; skipping full evaluation'],
metrics: {
budgetSlope: 0,
reworkRatio: 0,
stepsUntilCheckpoint: 0,
coherenceLevel: this.getCoherenceLevel(context.coherenceScore),
uncertaintyLevel: this.getUncertaintyLevel(context.uncertaintyScore),
},
};
}
this.lastEvaluationTime = now;
const decision = this.evaluate(context);
// Record in history
const record = {
step: context.stepNumber,
decision,
timestamp: now,
tokensUsed: context.totalTokensUsed,
};
this.history.push(record);
// Evict oldest if exceeding max size
if (this.history.length > MAX_HISTORY_SIZE) {
this.history.shift();
}
return decision;
}
/**
* Get the full evaluation history.
*
* Returns up to MAX_HISTORY_SIZE most recent evaluations.
*
* @returns Array of evaluation records ordered oldest to newest
*/
getHistory() {
return this.history.map(r => ({
step: r.step,
decision: { ...r.decision },
timestamp: r.timestamp,
}));
}
/**
* Get aggregate statistics across all evaluations.
*
* @returns Statistics including total evaluations, decision counts, and average budget slope
*/
getStats() {
const decisions = {
continue: 0,
checkpoint: 0,
throttle: 0,
pause: 0,
stop: 0,
};
let totalSlope = 0;
let slopeCount = 0;
for (const record of this.history) {
decisions[record.decision.decision]++;
totalSlope += record.decision.metrics.budgetSlope;
slopeCount++;
}
return {
totalEvaluations: this.history.length,
decisions,
averageBudgetSlope: slopeCount > 0 ? totalSlope / slopeCount : 0,
};
}
/**
* Reset all internal state.
*
* Clears history, token tracking, and last evaluation time.
*/
reset() {
this.history.length = 0;
this.tokenHistory.length = 0;
this.lastEvaluationTime = 0;
}
/**
* Get the current configuration.
*/
getConfig() {
return { ...this.config };
}
// =========================================================================
// Private Helpers
// =========================================================================
/**
* Compute the budget slope (rate of token consumption per step)
* using linear regression on the last N steps.
*
* Returns the slope coefficient (tokens per step). A positive slope
* indicates increasing token usage. A slope above the configured
* threshold indicates budget acceleration.
*/
computeBudgetSlope() {
if (this.tokenHistory.length < 2) {
// Need at least 2 points for regression
return 0;
}
const n = this.tokenHistory.length;
let sumX = 0;
let sumY = 0;
let sumXY = 0;
let sumXX = 0;
for (const point of this.tokenHistory) {
const x = point.step;
const y = point.tokens;
sumX += x;
sumY += y;
sumXY += x * y;
sumXX += x * x;
}
// Linear regression: slope = (n*sumXY - sumX*sumY) / (n*sumXX - sumX*sumX)
const denominator = n * sumXX - sumX * sumX;
if (denominator === 0) {
// All x values are the same (shouldn't happen, but guard)
return 0;
}
const slope = (n * sumXY - sumX * sumY) / denominator;
return slope;
}
/**
* Map coherence score to a health level.
*/
getCoherenceLevel(score) {
if (score >= 0.7)
return 'healthy';
if (score >= 0.4)
return 'degraded';
return 'critical';
}
/**
* Map uncertainty score to a level.
*/
getUncertaintyLevel(score) {
if (score <= 0.3)
return 'low';
if (score <= 0.6)
return 'moderate';
if (score <= 0.8)
return 'high';
return 'extreme';
}
/**
* Create a standardized decision object.
*/
createDecision(decision, reasons, metrics, recommendedAction) {
return {
decision,
reasons,
metrics,
recommendedAction,
};
}
}
// ============================================================================
// Factory Function
// ============================================================================
/**
* Create a ContinueGate instance with optional configuration.
*
* @param config - Partial configuration; unspecified values use defaults
* @returns A fresh ContinueGate instance
*/
export function createContinueGate(config) {
return new ContinueGate(config);
}
//# sourceMappingURL=continue-gate.js.map