375 lines
12 KiB
JavaScript
375 lines
12 KiB
JavaScript
/**
|
|
* Run Ledger + Evaluators
|
|
*
|
|
* Logs every run as an event with a minimum schema, then runs evaluators
|
|
* to assess compliance and quality.
|
|
*
|
|
* Objective evaluators:
|
|
* 1. Tests pass
|
|
* 2. Lint pass
|
|
* 3. Forbidden dependency scan
|
|
* 4. Forbidden command scan
|
|
* 5. Required sections present in plan
|
|
*
|
|
* Subjective evaluators:
|
|
* 1. Reviewer rating (pass/fail)
|
|
* 2. Architecture compliance (pass/fail)
|
|
*
|
|
* @module @claude-flow/guidance/ledger
|
|
*/
|
|
import { randomUUID } from 'node:crypto';
|
|
// ============================================================================
|
|
// Built-in Evaluators
|
|
// ============================================================================
|
|
/**
|
|
* Tests Pass evaluator - checks test results
|
|
*/
|
|
export class TestsPassEvaluator {
|
|
name = 'tests-pass';
|
|
type = 'objective';
|
|
async evaluate(event) {
|
|
if (!event.testResults.ran) {
|
|
return {
|
|
name: this.name,
|
|
passed: false,
|
|
details: 'Tests were not run during this task',
|
|
score: 0,
|
|
};
|
|
}
|
|
const passed = event.testResults.failed === 0;
|
|
const total = event.testResults.passed + event.testResults.failed + event.testResults.skipped;
|
|
return {
|
|
name: this.name,
|
|
passed,
|
|
details: passed
|
|
? `All ${event.testResults.passed} tests passed (${event.testResults.skipped} skipped)`
|
|
: `${event.testResults.failed} of ${total} tests failed`,
|
|
score: total > 0 ? event.testResults.passed / total : 0,
|
|
};
|
|
}
|
|
}
|
|
/**
|
|
* Forbidden command scan evaluator
|
|
*/
|
|
export class ForbiddenCommandEvaluator {
|
|
name = 'forbidden-command-scan';
|
|
type = 'objective';
|
|
forbiddenPatterns;
|
|
constructor(forbiddenPatterns) {
|
|
this.forbiddenPatterns = forbiddenPatterns ?? [
|
|
/\brm\s+-rf\s+\//,
|
|
/\bgit\s+push\s+--force\s+origin\s+(?:main|master)\b/,
|
|
/\bcurl\s+.*\|\s*(?:sh|bash)\b/,
|
|
/\beval\s*\(/,
|
|
/\bexec\s*\(/,
|
|
];
|
|
}
|
|
async evaluate(event) {
|
|
const violations = [];
|
|
for (const tool of event.toolsUsed) {
|
|
for (const pattern of this.forbiddenPatterns) {
|
|
if (pattern.test(tool)) {
|
|
violations.push(`Forbidden command pattern: ${pattern.source} matched in "${tool}"`);
|
|
}
|
|
}
|
|
}
|
|
return {
|
|
name: this.name,
|
|
passed: violations.length === 0,
|
|
details: violations.length === 0
|
|
? 'No forbidden commands detected'
|
|
: `Found ${violations.length} forbidden command(s): ${violations.join('; ')}`,
|
|
score: violations.length === 0 ? 1 : 0,
|
|
};
|
|
}
|
|
}
|
|
/**
|
|
* Forbidden dependency scan evaluator
|
|
*/
|
|
export class ForbiddenDependencyEvaluator {
|
|
name = 'forbidden-dependency-scan';
|
|
type = 'objective';
|
|
forbiddenPackages;
|
|
constructor(forbiddenPackages) {
|
|
this.forbiddenPackages = forbiddenPackages ?? [];
|
|
}
|
|
async evaluate(event) {
|
|
if (this.forbiddenPackages.length === 0) {
|
|
return { name: this.name, passed: true, details: 'No forbidden dependencies configured', score: 1 };
|
|
}
|
|
// Check if any forbidden packages were introduced in touched files
|
|
const packageFiles = event.filesTouched.filter(f => f.endsWith('package.json') || f.endsWith('package-lock.json'));
|
|
return {
|
|
name: this.name,
|
|
passed: true,
|
|
details: packageFiles.length > 0
|
|
? `Package files modified: ${packageFiles.join(', ')} - manual review recommended`
|
|
: 'No package files modified',
|
|
score: 1,
|
|
};
|
|
}
|
|
}
|
|
/**
|
|
* Violation rate evaluator - checks violation count
|
|
*/
|
|
export class ViolationRateEvaluator {
|
|
name = 'violation-rate';
|
|
type = 'objective';
|
|
maxViolations;
|
|
constructor(maxViolations = 0) {
|
|
this.maxViolations = maxViolations;
|
|
}
|
|
async evaluate(event) {
|
|
const count = event.violations.length;
|
|
const passed = count <= this.maxViolations;
|
|
return {
|
|
name: this.name,
|
|
passed,
|
|
details: passed
|
|
? `${count} violation(s) within threshold (max: ${this.maxViolations})`
|
|
: `${count} violation(s) exceeds threshold (max: ${this.maxViolations})`,
|
|
score: Math.max(0, 1 - count / Math.max(this.maxViolations + 1, 1)),
|
|
};
|
|
}
|
|
}
|
|
/**
|
|
* Diff quality evaluator - checks rework ratio
|
|
*/
|
|
export class DiffQualityEvaluator {
|
|
name = 'diff-quality';
|
|
type = 'objective';
|
|
maxReworkRatio;
|
|
constructor(maxReworkRatio = 0.3) {
|
|
this.maxReworkRatio = maxReworkRatio;
|
|
}
|
|
async evaluate(event) {
|
|
const totalLines = event.diffSummary.linesAdded + event.diffSummary.linesRemoved;
|
|
if (totalLines === 0) {
|
|
return { name: this.name, passed: true, details: 'No diff produced', score: 1 };
|
|
}
|
|
const reworkRatio = event.reworkLines / totalLines;
|
|
const passed = reworkRatio <= this.maxReworkRatio;
|
|
return {
|
|
name: this.name,
|
|
passed,
|
|
details: `Rework ratio: ${(reworkRatio * 100).toFixed(1)}% (${event.reworkLines}/${totalLines} lines). Threshold: ${(this.maxReworkRatio * 100).toFixed(0)}%`,
|
|
score: Math.max(0, 1 - reworkRatio),
|
|
};
|
|
}
|
|
}
|
|
// ============================================================================
|
|
// Run Ledger
|
|
// ============================================================================
|
|
export class RunLedger {
|
|
events = [];
|
|
evaluators = [];
|
|
maxEvents;
|
|
/**
|
|
* @param maxEvents - Maximum events to retain in memory (0 = unlimited).
|
|
* When the limit is exceeded the oldest events are evicted.
|
|
*/
|
|
constructor(maxEvents = 0) {
|
|
this.maxEvents = maxEvents;
|
|
// Register default evaluators
|
|
this.evaluators = [
|
|
new TestsPassEvaluator(),
|
|
new ForbiddenCommandEvaluator(),
|
|
new ForbiddenDependencyEvaluator(),
|
|
new ViolationRateEvaluator(),
|
|
new DiffQualityEvaluator(),
|
|
];
|
|
}
|
|
/**
|
|
* Add a custom evaluator
|
|
*/
|
|
addEvaluator(evaluator) {
|
|
this.evaluators.push(evaluator);
|
|
}
|
|
/**
|
|
* Remove an evaluator by name
|
|
*/
|
|
removeEvaluator(name) {
|
|
this.evaluators = this.evaluators.filter(e => e.name !== name);
|
|
}
|
|
/**
|
|
* Log a run event
|
|
*/
|
|
logEvent(event) {
|
|
const fullEvent = {
|
|
...event,
|
|
eventId: randomUUID(),
|
|
};
|
|
this.events.push(fullEvent);
|
|
this.evictIfNeeded();
|
|
return fullEvent;
|
|
}
|
|
/**
|
|
* Create a new run event with defaults
|
|
*/
|
|
createEvent(taskId, intent, guidanceHash) {
|
|
return {
|
|
eventId: randomUUID(),
|
|
taskId,
|
|
guidanceHash,
|
|
retrievedRuleIds: [],
|
|
toolsUsed: [],
|
|
filesTouched: [],
|
|
diffSummary: { linesAdded: 0, linesRemoved: 0, filesChanged: 0 },
|
|
testResults: { ran: false, passed: 0, failed: 0, skipped: 0 },
|
|
violations: [],
|
|
outcomeAccepted: null,
|
|
reworkLines: 0,
|
|
intent,
|
|
timestamp: Date.now(),
|
|
durationMs: 0,
|
|
};
|
|
}
|
|
/**
|
|
* Finalize and store an event
|
|
*/
|
|
finalizeEvent(event) {
|
|
event.durationMs = Date.now() - event.timestamp;
|
|
this.events.push(event);
|
|
this.evictIfNeeded();
|
|
return event;
|
|
}
|
|
/**
|
|
* Evict oldest events when maxEvents is exceeded.
|
|
* Trims 10% in a batch to amortize the O(n) splice cost.
|
|
*/
|
|
evictIfNeeded() {
|
|
if (this.maxEvents > 0 && this.events.length > this.maxEvents) {
|
|
const trimCount = Math.max(1, Math.floor(this.maxEvents * 0.1));
|
|
this.events.splice(0, trimCount);
|
|
}
|
|
}
|
|
/**
|
|
* Run all evaluators against an event
|
|
*/
|
|
async evaluate(event) {
|
|
const results = [];
|
|
for (const evaluator of this.evaluators) {
|
|
const result = await evaluator.evaluate(event);
|
|
results.push(result);
|
|
}
|
|
return results;
|
|
}
|
|
/**
|
|
* Get all events
|
|
*/
|
|
getEvents() {
|
|
return [...this.events];
|
|
}
|
|
/**
|
|
* Get events by task ID
|
|
*/
|
|
getEventsByTask(taskId) {
|
|
return this.events.filter(e => e.taskId === taskId);
|
|
}
|
|
/**
|
|
* Get events within a time range
|
|
*/
|
|
getEventsInRange(startMs, endMs) {
|
|
return this.events.filter(e => e.timestamp >= startMs && e.timestamp <= endMs);
|
|
}
|
|
/**
|
|
* Get recent events
|
|
*/
|
|
getRecentEvents(count) {
|
|
return this.events.slice(-count);
|
|
}
|
|
/**
|
|
* Compute optimization metrics from events
|
|
*/
|
|
computeMetrics(events) {
|
|
const evts = events ?? this.events;
|
|
if (evts.length === 0) {
|
|
return {
|
|
violationRate: 0,
|
|
selfCorrectionRate: 0,
|
|
reworkLines: 0,
|
|
clarifyingQuestions: 0,
|
|
taskCount: 0,
|
|
};
|
|
}
|
|
// Violations per 10 tasks
|
|
const totalViolations = evts.reduce((sum, e) => sum + e.violations.length, 0);
|
|
const violationRate = evts.length > 0 ? (totalViolations / evts.length) * 10 : 0;
|
|
// Self-correction rate: violations that were auto-corrected
|
|
const totalCorrectable = evts.reduce((sum, e) => sum + e.violations.length, 0);
|
|
const totalCorrected = evts.reduce((sum, e) => sum + e.violations.filter(v => v.autoCorrected).length, 0);
|
|
const selfCorrectionRate = totalCorrectable > 0
|
|
? totalCorrected / totalCorrectable
|
|
: 1;
|
|
// Average rework lines
|
|
const reworkLines = evts.reduce((sum, e) => sum + e.reworkLines, 0) / evts.length;
|
|
// Clarifying questions are tracked in metadata (placeholder for now)
|
|
const clarifyingQuestions = 0;
|
|
return {
|
|
violationRate,
|
|
selfCorrectionRate,
|
|
reworkLines,
|
|
clarifyingQuestions,
|
|
taskCount: evts.length,
|
|
};
|
|
}
|
|
/**
|
|
* Rank violations by frequency and cost (rework lines)
|
|
*/
|
|
rankViolations(windowEvents) {
|
|
const evts = windowEvents ?? this.events;
|
|
const violationMap = new Map();
|
|
for (const event of evts) {
|
|
for (const violation of event.violations) {
|
|
const existing = violationMap.get(violation.ruleId) ?? { frequency: 0, totalRework: 0 };
|
|
existing.frequency++;
|
|
existing.totalRework += event.reworkLines;
|
|
violationMap.set(violation.ruleId, existing);
|
|
}
|
|
}
|
|
const rankings = [];
|
|
for (const [ruleId, stats] of violationMap) {
|
|
const cost = stats.totalRework / stats.frequency;
|
|
rankings.push({
|
|
ruleId,
|
|
frequency: stats.frequency,
|
|
cost,
|
|
score: stats.frequency * cost,
|
|
});
|
|
}
|
|
return rankings.sort((a, b) => b.score - a.score);
|
|
}
|
|
/**
|
|
* Get event count
|
|
*/
|
|
get eventCount() {
|
|
return this.events.length;
|
|
}
|
|
/**
|
|
* Export events for persistence
|
|
*/
|
|
exportEvents() {
|
|
return [...this.events];
|
|
}
|
|
/**
|
|
* Import events from persistence
|
|
*/
|
|
importEvents(events) {
|
|
this.events.push(...events);
|
|
}
|
|
/**
|
|
* Clear all events
|
|
*/
|
|
clear() {
|
|
this.events = [];
|
|
}
|
|
}
|
|
/**
|
|
* Create a run ledger instance
|
|
*
|
|
* @param maxEvents - Maximum events to retain in memory (0 = unlimited).
|
|
*/
|
|
export function createLedger(maxEvents = 0) {
|
|
return new RunLedger(maxEvents);
|
|
}
|
|
//# sourceMappingURL=ledger.js.map
|