91 lines
3.9 KiB
JavaScript
91 lines
3.9 KiB
JavaScript
import test from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
import { TerminalAgent, TerminalToolRegistry } from '../lib/agent/terminal-agent.mjs';
|
|
|
|
function providerWith(decisions) {
|
|
let index = 0;
|
|
return {
|
|
isConfigured: true,
|
|
async complete() {
|
|
const decision = decisions[Math.min(index++, decisions.length - 1)];
|
|
return { text: JSON.stringify(decision) };
|
|
},
|
|
};
|
|
}
|
|
|
|
test('terminal agent performs bounded multi-step tool reasoning', async () => {
|
|
const registry = new TerminalToolRegistry([
|
|
{ name: 'get_status', description: 'status', handler: async () => ({ status: 'degraded' }) },
|
|
{ name: 'search_memory', description: 'memory', handler: async args => ({ query: args.query, hits: 2 }) },
|
|
]);
|
|
const agent = new TerminalAgent({
|
|
registry,
|
|
provider: providerWith([
|
|
{ type: 'tool_call', tool: 'get_status', arguments: {}, rationale: 'Check freshness' },
|
|
{ type: 'tool_call', tool: 'search_memory', arguments: { query: 'Iran' }, rationale: 'Compare history' },
|
|
{ type: 'final', answer: 'Two historical events support the current signal.', confidence: 'medium', evidence: ['evt-1'], notify: false, priority: 'routine' },
|
|
]),
|
|
maxSteps: 4,
|
|
});
|
|
|
|
const result = await agent.run('What changed?', { chatId: 42 });
|
|
assert.equal(result.answer, 'Two historical events support the current signal.');
|
|
assert.equal(result.confidence, 'medium');
|
|
assert.deepEqual(result.trace.map(item => item.tool), ['get_status', 'search_memory']);
|
|
assert.ok(result.trace.every(item => item.status === 'ok'));
|
|
});
|
|
|
|
test('mutating tools require chat-bound confirmation', async () => {
|
|
let executions = 0;
|
|
const registry = new TerminalToolRegistry([{
|
|
name: 'trigger_sweep',
|
|
description: 'sweep',
|
|
mutating: true,
|
|
handler: async (_args, runtime) => {
|
|
assert.equal(runtime.confirmed, true);
|
|
executions++;
|
|
return { accepted: true };
|
|
},
|
|
}]);
|
|
const agent = new TerminalAgent({
|
|
registry,
|
|
provider: providerWith([{ type: 'tool_call', tool: 'trigger_sweep', arguments: {}, rationale: 'Fresh data needed' }]),
|
|
});
|
|
|
|
const proposal = await agent.run('Run a sweep', { chatId: 42 });
|
|
assert.equal(executions, 0);
|
|
assert.equal(proposal.pendingAction.tool, 'trigger_sweep');
|
|
assert.equal((await agent.confirm(proposal.pendingAction.id, 99)).ok, false);
|
|
assert.equal(executions, 0);
|
|
assert.equal((await agent.confirm(proposal.pendingAction.id, 42)).ok, true);
|
|
assert.equal(executions, 1);
|
|
assert.equal((await agent.confirm(proposal.pendingAction.id, 42)).ok, false);
|
|
});
|
|
|
|
test('unknown tools fail closed and remain in audit trace', async () => {
|
|
const agent = new TerminalAgent({
|
|
registry: new TerminalToolRegistry([]),
|
|
provider: providerWith([
|
|
{ type: 'tool_call', tool: 'run_shell', arguments: { command: 'whoami' }, rationale: 'Not allowed' },
|
|
{ type: 'final', answer: 'That operation is not available.', confidence: 'high', evidence: [], notify: false, priority: 'routine' },
|
|
]),
|
|
});
|
|
const result = await agent.run('Run shell', { chatId: 42 });
|
|
assert.equal(result.answer, 'That operation is not available.');
|
|
assert.deepEqual(result.trace[0], { tool: 'run_shell', status: 'rejected', durationMs: 0, rationale: 'Not allowed' });
|
|
});
|
|
|
|
test('proactive notifications observe cooldown', async () => {
|
|
const agent = new TerminalAgent({
|
|
registry: new TerminalToolRegistry([]),
|
|
provider: providerWith([{ type: 'final', answer: 'Material escalation detected.', confidence: 'high', evidence: ['https://example.test'], notify: true, priority: 'flash' }]),
|
|
proactiveCooldownMs: 60000,
|
|
});
|
|
const first = await agent.analyzeProactively('Evaluate');
|
|
const second = await agent.analyzeProactively('Evaluate again');
|
|
assert.equal(first.notify, true);
|
|
assert.equal(first.priority, 'flash');
|
|
assert.equal(second.notify, false);
|
|
assert.equal(second.suppressed, 'cooldown');
|
|
});
|