feat: alert operators on stale data
All checks were successful
Codex Template Compliance / template-compliance (pull_request) Successful in 5s
Build / test-and-image (pull_request) Successful in 53s

This commit is contained in:
2026-05-17 13:58:32 +02:00
parent 8605d0baab
commit e574ad1c3d
6 changed files with 151 additions and 0 deletions

View File

@@ -6,6 +6,8 @@ PORT=3117
REFRESH_INTERVAL_MINUTES=15 REFRESH_INTERVAL_MINUTES=15
AUTO_OPEN_BROWSER=false AUTO_OPEN_BROWSER=false
STALE_DATA_MAX_AGE_MINUTES=60 STALE_DATA_MAX_AGE_MINUTES=60
STALE_ALERT_COOLDOWN_MINUTES=60
DASHBOARD_URL=
TERMINAL_ACTIONS_ENABLED=true TERMINAL_ACTIONS_ENABLED=true
SWEEP_TOKEN= SWEEP_TOKEN=
BRIEF_VERBOSITY=standard BRIEF_VERBOSITY=standard

View File

@@ -135,6 +135,8 @@ PORT=3117
REFRESH_INTERVAL_MINUTES=15 REFRESH_INTERVAL_MINUTES=15
AUTO_OPEN_BROWSER=false AUTO_OPEN_BROWSER=false
STALE_DATA_MAX_AGE_MINUTES=60 STALE_DATA_MAX_AGE_MINUTES=60
STALE_ALERT_COOLDOWN_MINUTES=60
DASHBOARD_URL=https://intelligence.example.internal
TERMINAL_ACTIONS_ENABLED=true TERMINAL_ACTIONS_ENABLED=true
SWEEP_TOKEN= SWEEP_TOKEN=
BRIEF_VERBOSITY=standard BRIEF_VERBOSITY=standard
@@ -188,6 +190,8 @@ LLM_MODEL=your-model
For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-terminal:3117` (or the `PORT` you set). Missing API keys do not crash sweeps; affected sources are reported as degraded in `/api/health`. For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-terminal:3117` (or the `PORT` you set). Missing API keys do not crash sweeps; affected sources are reported as degraded in `/api/health`.
When data remains stale past `STALE_DATA_MAX_AGE_MINUTES`, the server sends an operator alert through configured Telegram/Discord channels after failed or degraded sweep attempts. `STALE_ALERT_COOLDOWN_MINUTES` prevents repeated stale alerts from spamming every refresh interval. Set `DASHBOARD_URL` to the Pangolin/public URL you want included in those alerts.
The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`. The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`.
#### Build And Publish Your Gitea Image #### Build And Publish Your Gitea Image
@@ -525,6 +529,9 @@ All settings are in `.env` with sensible defaults:
|----------|---------|-------------| |----------|---------|-------------|
| `PORT` | `3117` | Dashboard server port | | `PORT` | `3117` | Dashboard server port |
| `REFRESH_INTERVAL_MINUTES` | `15` | Auto-refresh interval | | `REFRESH_INTERVAL_MINUTES` | `15` | Auto-refresh interval |
| `STALE_DATA_MAX_AGE_MINUTES` | `60` | Data age threshold for stale health state |
| `STALE_ALERT_COOLDOWN_MINUTES` | `60` | Minimum time between repeated operator stale-data alerts |
| `DASHBOARD_URL` | local URL | Dashboard URL included in operator alerts |
| `LLM_PROVIDER` | disabled | `anthropic`, `openai`, `gemini`, `codex`, `openrouter`, `minimax`, `mistral`, or `grok` | | `LLM_PROVIDER` | disabled | `anthropic`, `openai`, `gemini`, `codex`, `openrouter`, `minimax`, `mistral`, or `grok` |
| `LLM_API_KEY` | — | API key (not needed for codex) | | `LLM_API_KEY` | — | API key (not needed for codex) |
| `LLM_MODEL` | per-provider default | Override model selection | | `LLM_MODEL` | per-provider default | Override model selection |

View File

@@ -23,6 +23,8 @@ export default {
refreshIntervalMinutes: intEnv('REFRESH_INTERVAL_MINUTES', 15), refreshIntervalMinutes: intEnv('REFRESH_INTERVAL_MINUTES', 15),
autoOpenBrowser: boolEnv('AUTO_OPEN_BROWSER', false), autoOpenBrowser: boolEnv('AUTO_OPEN_BROWSER', false),
staleDataMaxAgeMinutes: intEnv('STALE_DATA_MAX_AGE_MINUTES', 60), staleDataMaxAgeMinutes: intEnv('STALE_DATA_MAX_AGE_MINUTES', 60),
staleAlertCooldownMinutes: intEnv('STALE_ALERT_COOLDOWN_MINUTES', 60),
dashboardUrl: process.env.DASHBOARD_URL || null,
sweepToken: process.env.SWEEP_TOKEN || null, sweepToken: process.env.SWEEP_TOKEN || null,
terminalActionsEnabled: boolEnv('TERMINAL_ACTIONS_ENABLED', true), terminalActionsEnabled: boolEnv('TERMINAL_ACTIONS_ENABLED', true),

52
lib/stale-alerts.mjs Normal file
View File

@@ -0,0 +1,52 @@
const DEFAULT_COOLDOWN_MS = 60 * 60 * 1000;
export function shouldSendStaleAlert(health, state = {}, opts = {}) {
const now = opts.now ?? Date.now();
const cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
if (!health?.stale) {
state.lastStaleAlertKey = null;
return { send: false, reason: 'not_stale' };
}
const key = [
health.lastSuccessfulSweep || 'never',
health.lastSweepError || 'no-error',
health.sourcesFailed || 0,
health.sourcesDegraded || 0,
].join('|');
if (state.lastStaleAlertKey === key && now - (state.lastStaleAlertAt || 0) < cooldownMs) {
return { send: false, reason: 'cooldown', key };
}
state.lastStaleAlertKey = key;
state.lastStaleAlertAt = now;
return { send: true, reason: 'stale', key };
}
export function formatStaleAlert(health, opts = {}) {
const dashboardUrl = opts.dashboardUrl || 'http://localhost:3117';
const context = opts.context || 'scheduled sweep';
const ageMinutes = health.dataAgeSeconds == null ? 'unknown' : Math.floor(health.dataAgeSeconds / 60);
const affected = (health.sourceHealth || [])
.filter(s => (s.status && s.status !== 'ok') || s.error)
.slice(0, 6)
.map(s => `- ${s.name || s.n || 'source'}: ${s.status || 'degraded'}${s.error ? ` (${String(s.error).slice(0, 100)})` : ''}`);
return [
'*CRUCIX STALE DATA ALERT*',
'',
`Context: ${context}`,
`Status: ${health.status || 'unknown'}`,
`Data age: ${ageMinutes} minutes`,
`Last successful sweep: ${health.lastSuccessfulSweep || 'never'}`,
`Last attempted sweep: ${health.lastSweep || 'never'}`,
`Last error: ${health.lastSweepError || 'none'}`,
`Sources: ${health.sourcesOk || 0} OK / ${health.sourcesDegraded || 0} degraded / ${health.sourcesFailed || 0} failed`,
'',
'*Affected sources*',
affected.length ? affected.join('\n') : '- No per-source errors available',
'',
`Dashboard: ${dashboardUrl}`,
].join('\n');
}

View File

@@ -18,6 +18,7 @@ import { TelegramAlerter } from './lib/alerts/telegram.mjs';
import { DiscordAlerter } from './lib/alerts/discord.mjs'; import { DiscordAlerter } from './lib/alerts/discord.mjs';
import { getFetchMetrics } from './apis/utils/fetch.mjs'; import { getFetchMetrics } from './apis/utils/fetch.mjs';
import { IntelligenceStore } from './lib/intelligence-store.mjs'; import { IntelligenceStore } from './lib/intelligence-store.mjs';
import { formatStaleAlert, shouldSendStaleAlert } from './lib/stale-alerts.mjs';
const __dirname = dirname(fileURLToPath(import.meta.url)); const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = __dirname; const ROOT = __dirname;
@@ -39,6 +40,7 @@ let sweepStartedAt = null; // Timestamp when current/last sweep started
let sweepInProgress = false; let sweepInProgress = false;
const startTime = Date.now(); const startTime = Date.now();
const sseClients = new Set(); const sseClients = new Set();
const staleAlertState = {};
// === Delta/Memory === // === Delta/Memory ===
const memory = new MemoryManager(RUNS_DIR); const memory = new MemoryManager(RUNS_DIR);
@@ -411,6 +413,31 @@ function buildHealth() {
}; };
} }
async function notifyIfDataStale(context = 'scheduled sweep') {
const health = buildHealth();
const decision = shouldSendStaleAlert(health, staleAlertState, {
cooldownMs: config.staleAlertCooldownMinutes * 60 * 1000,
});
if (!decision.send) return false;
const dashboardUrl = config.dashboardUrl || `http://localhost:${config.port}`;
const message = formatStaleAlert(health, { dashboardUrl, context });
const sends = [];
if (telegramAlerter.isConfigured) sends.push(telegramAlerter.sendMessage(message));
if (discordAlerter.isConfigured) sends.push(discordAlerter.sendAlert(message));
if (sends.length === 0) {
console.warn('[Crucix] Data is stale but no operator alert channel is configured');
return false;
}
const results = await Promise.allSettled(sends);
const sent = results.some(r => r.status === 'fulfilled' && (r.value === true || r.value?.ok === true));
if (sent) console.warn('[Crucix] Operator stale-data alert sent');
else console.warn('[Crucix] Operator stale-data alert attempted but no channel accepted it');
return sent;
}
function buildBrief(data) { function buildBrief(data) {
const verbosity = config.telegram.briefVerbosity || 'standard'; const verbosity = config.telegram.briefVerbosity || 'standard';
const delta = memory.getLastDelta(); const delta = memory.getLastDelta();
@@ -553,6 +580,9 @@ async function runSweepCycle() {
broadcast({ type: 'sweep_error', error: err.message }); broadcast({ type: 'sweep_error', error: err.message });
} finally { } finally {
sweepInProgress = false; sweepInProgress = false;
await notifyIfDataStale(lastSweepError ? 'failed sweep' : 'completed sweep').catch(err => {
console.error('[Crucix] Stale-data operator alert failed:', err.message);
});
} }
} }

View File

@@ -1,6 +1,7 @@
import test from 'node:test'; import test from 'node:test';
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs';
import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs';
test('safeFetch reports HTML as degraded JSON response', async () => { test('safeFetch reports HTML as degraded JSON response', async () => {
const originalFetch = globalThis.fetch; const originalFetch = globalThis.fetch;
@@ -34,3 +35,60 @@ test('safeFetchText returns text and byte count', async () => {
globalThis.fetch = originalFetch; globalThis.fetch = originalFetch;
} }
}); });
test('stale alert is skipped for fresh health and resets active key', () => {
const state = { lastStaleAlertKey: 'old', lastStaleAlertAt: 100 };
const decision = shouldSendStaleAlert({ stale: false }, state, { now: 200 });
assert.equal(decision.send, false);
assert.equal(decision.reason, 'not_stale');
assert.equal(state.lastStaleAlertKey, null);
});
test('stale alert sends once and deduplicates during cooldown', () => {
const state = {};
const health = {
stale: true,
lastSuccessfulSweep: '2026-05-17T08:00:00.000Z',
lastSweepError: 'network timeout',
sourcesFailed: 2,
sourcesDegraded: 1,
};
const first = shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 });
const second = shouldSendStaleAlert(health, state, { now: 2_000, cooldownMs: 60_000 });
assert.equal(first.send, true);
assert.equal(second.send, false);
assert.equal(second.reason, 'cooldown');
});
test('stale alert repeats after cooldown', () => {
const state = {};
const health = { stale: true, lastSuccessfulSweep: 'a', lastSweepError: 'timeout', sourcesFailed: 1 };
assert.equal(shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 }).send, true);
assert.equal(shouldSendStaleAlert(health, state, { now: 62_000, cooldownMs: 60_000 }).send, true);
});
test('stale alert message includes operator context and affected sources', () => {
const message = formatStaleAlert({
status: 'stale',
stale: true,
dataAgeSeconds: 7200,
lastSuccessfulSweep: '2026-05-17T08:00:00.000Z',
lastSweep: '2026-05-17T10:00:00.000Z',
lastSweepError: 'GDELT timeout',
sourcesOk: 20,
sourcesDegraded: 3,
sourcesFailed: 2,
sourceHealth: [
{ name: 'GDELT', status: 'degraded', error: 'timeout' },
{ name: 'Reddit', status: 'no_credentials' },
],
}, { dashboardUrl: 'https://terminal.example.test', context: 'failed sweep' });
assert.match(message, /CRUCIX STALE DATA ALERT/);
assert.match(message, /Data age: 120 minutes/);
assert.match(message, /GDELT: degraded \(timeout\)/);
assert.match(message, /Dashboard: https:\/\/terminal\.example\.test/);
});