feat: alert operators on stale data
This commit is contained in:
@@ -6,6 +6,8 @@ PORT=3117
|
|||||||
REFRESH_INTERVAL_MINUTES=15
|
REFRESH_INTERVAL_MINUTES=15
|
||||||
AUTO_OPEN_BROWSER=false
|
AUTO_OPEN_BROWSER=false
|
||||||
STALE_DATA_MAX_AGE_MINUTES=60
|
STALE_DATA_MAX_AGE_MINUTES=60
|
||||||
|
STALE_ALERT_COOLDOWN_MINUTES=60
|
||||||
|
DASHBOARD_URL=
|
||||||
TERMINAL_ACTIONS_ENABLED=true
|
TERMINAL_ACTIONS_ENABLED=true
|
||||||
SWEEP_TOKEN=
|
SWEEP_TOKEN=
|
||||||
BRIEF_VERBOSITY=standard
|
BRIEF_VERBOSITY=standard
|
||||||
|
|||||||
@@ -135,6 +135,8 @@ PORT=3117
|
|||||||
REFRESH_INTERVAL_MINUTES=15
|
REFRESH_INTERVAL_MINUTES=15
|
||||||
AUTO_OPEN_BROWSER=false
|
AUTO_OPEN_BROWSER=false
|
||||||
STALE_DATA_MAX_AGE_MINUTES=60
|
STALE_DATA_MAX_AGE_MINUTES=60
|
||||||
|
STALE_ALERT_COOLDOWN_MINUTES=60
|
||||||
|
DASHBOARD_URL=https://intelligence.example.internal
|
||||||
TERMINAL_ACTIONS_ENABLED=true
|
TERMINAL_ACTIONS_ENABLED=true
|
||||||
SWEEP_TOKEN=
|
SWEEP_TOKEN=
|
||||||
BRIEF_VERBOSITY=standard
|
BRIEF_VERBOSITY=standard
|
||||||
@@ -188,6 +190,8 @@ LLM_MODEL=your-model
|
|||||||
|
|
||||||
For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-terminal:3117` (or the `PORT` you set). Missing API keys do not crash sweeps; affected sources are reported as degraded in `/api/health`.
|
For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-terminal:3117` (or the `PORT` you set). Missing API keys do not crash sweeps; affected sources are reported as degraded in `/api/health`.
|
||||||
|
|
||||||
|
When data remains stale past `STALE_DATA_MAX_AGE_MINUTES`, the server sends an operator alert through configured Telegram/Discord channels after failed or degraded sweep attempts. `STALE_ALERT_COOLDOWN_MINUTES` prevents repeated stale alerts from spamming every refresh interval. Set `DASHBOARD_URL` to the Pangolin/public URL you want included in those alerts.
|
||||||
|
|
||||||
The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`.
|
The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`.
|
||||||
|
|
||||||
#### Build And Publish Your Gitea Image
|
#### Build And Publish Your Gitea Image
|
||||||
@@ -525,6 +529,9 @@ All settings are in `.env` with sensible defaults:
|
|||||||
|----------|---------|-------------|
|
|----------|---------|-------------|
|
||||||
| `PORT` | `3117` | Dashboard server port |
|
| `PORT` | `3117` | Dashboard server port |
|
||||||
| `REFRESH_INTERVAL_MINUTES` | `15` | Auto-refresh interval |
|
| `REFRESH_INTERVAL_MINUTES` | `15` | Auto-refresh interval |
|
||||||
|
| `STALE_DATA_MAX_AGE_MINUTES` | `60` | Data age threshold for stale health state |
|
||||||
|
| `STALE_ALERT_COOLDOWN_MINUTES` | `60` | Minimum time between repeated operator stale-data alerts |
|
||||||
|
| `DASHBOARD_URL` | local URL | Dashboard URL included in operator alerts |
|
||||||
| `LLM_PROVIDER` | disabled | `anthropic`, `openai`, `gemini`, `codex`, `openrouter`, `minimax`, `mistral`, or `grok` |
|
| `LLM_PROVIDER` | disabled | `anthropic`, `openai`, `gemini`, `codex`, `openrouter`, `minimax`, `mistral`, or `grok` |
|
||||||
| `LLM_API_KEY` | — | API key (not needed for codex) |
|
| `LLM_API_KEY` | — | API key (not needed for codex) |
|
||||||
| `LLM_MODEL` | per-provider default | Override model selection |
|
| `LLM_MODEL` | per-provider default | Override model selection |
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ export default {
|
|||||||
refreshIntervalMinutes: intEnv('REFRESH_INTERVAL_MINUTES', 15),
|
refreshIntervalMinutes: intEnv('REFRESH_INTERVAL_MINUTES', 15),
|
||||||
autoOpenBrowser: boolEnv('AUTO_OPEN_BROWSER', false),
|
autoOpenBrowser: boolEnv('AUTO_OPEN_BROWSER', false),
|
||||||
staleDataMaxAgeMinutes: intEnv('STALE_DATA_MAX_AGE_MINUTES', 60),
|
staleDataMaxAgeMinutes: intEnv('STALE_DATA_MAX_AGE_MINUTES', 60),
|
||||||
|
staleAlertCooldownMinutes: intEnv('STALE_ALERT_COOLDOWN_MINUTES', 60),
|
||||||
|
dashboardUrl: process.env.DASHBOARD_URL || null,
|
||||||
sweepToken: process.env.SWEEP_TOKEN || null,
|
sweepToken: process.env.SWEEP_TOKEN || null,
|
||||||
terminalActionsEnabled: boolEnv('TERMINAL_ACTIONS_ENABLED', true),
|
terminalActionsEnabled: boolEnv('TERMINAL_ACTIONS_ENABLED', true),
|
||||||
|
|
||||||
|
|||||||
52
lib/stale-alerts.mjs
Normal file
52
lib/stale-alerts.mjs
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
const DEFAULT_COOLDOWN_MS = 60 * 60 * 1000;
|
||||||
|
|
||||||
|
export function shouldSendStaleAlert(health, state = {}, opts = {}) {
|
||||||
|
const now = opts.now ?? Date.now();
|
||||||
|
const cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
|
||||||
|
if (!health?.stale) {
|
||||||
|
state.lastStaleAlertKey = null;
|
||||||
|
return { send: false, reason: 'not_stale' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const key = [
|
||||||
|
health.lastSuccessfulSweep || 'never',
|
||||||
|
health.lastSweepError || 'no-error',
|
||||||
|
health.sourcesFailed || 0,
|
||||||
|
health.sourcesDegraded || 0,
|
||||||
|
].join('|');
|
||||||
|
|
||||||
|
if (state.lastStaleAlertKey === key && now - (state.lastStaleAlertAt || 0) < cooldownMs) {
|
||||||
|
return { send: false, reason: 'cooldown', key };
|
||||||
|
}
|
||||||
|
|
||||||
|
state.lastStaleAlertKey = key;
|
||||||
|
state.lastStaleAlertAt = now;
|
||||||
|
return { send: true, reason: 'stale', key };
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatStaleAlert(health, opts = {}) {
|
||||||
|
const dashboardUrl = opts.dashboardUrl || 'http://localhost:3117';
|
||||||
|
const context = opts.context || 'scheduled sweep';
|
||||||
|
const ageMinutes = health.dataAgeSeconds == null ? 'unknown' : Math.floor(health.dataAgeSeconds / 60);
|
||||||
|
const affected = (health.sourceHealth || [])
|
||||||
|
.filter(s => (s.status && s.status !== 'ok') || s.error)
|
||||||
|
.slice(0, 6)
|
||||||
|
.map(s => `- ${s.name || s.n || 'source'}: ${s.status || 'degraded'}${s.error ? ` (${String(s.error).slice(0, 100)})` : ''}`);
|
||||||
|
|
||||||
|
return [
|
||||||
|
'*CRUCIX STALE DATA ALERT*',
|
||||||
|
'',
|
||||||
|
`Context: ${context}`,
|
||||||
|
`Status: ${health.status || 'unknown'}`,
|
||||||
|
`Data age: ${ageMinutes} minutes`,
|
||||||
|
`Last successful sweep: ${health.lastSuccessfulSweep || 'never'}`,
|
||||||
|
`Last attempted sweep: ${health.lastSweep || 'never'}`,
|
||||||
|
`Last error: ${health.lastSweepError || 'none'}`,
|
||||||
|
`Sources: ${health.sourcesOk || 0} OK / ${health.sourcesDegraded || 0} degraded / ${health.sourcesFailed || 0} failed`,
|
||||||
|
'',
|
||||||
|
'*Affected sources*',
|
||||||
|
affected.length ? affected.join('\n') : '- No per-source errors available',
|
||||||
|
'',
|
||||||
|
`Dashboard: ${dashboardUrl}`,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
30
server.mjs
30
server.mjs
@@ -18,6 +18,7 @@ import { TelegramAlerter } from './lib/alerts/telegram.mjs';
|
|||||||
import { DiscordAlerter } from './lib/alerts/discord.mjs';
|
import { DiscordAlerter } from './lib/alerts/discord.mjs';
|
||||||
import { getFetchMetrics } from './apis/utils/fetch.mjs';
|
import { getFetchMetrics } from './apis/utils/fetch.mjs';
|
||||||
import { IntelligenceStore } from './lib/intelligence-store.mjs';
|
import { IntelligenceStore } from './lib/intelligence-store.mjs';
|
||||||
|
import { formatStaleAlert, shouldSendStaleAlert } from './lib/stale-alerts.mjs';
|
||||||
|
|
||||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
const ROOT = __dirname;
|
const ROOT = __dirname;
|
||||||
@@ -39,6 +40,7 @@ let sweepStartedAt = null; // Timestamp when current/last sweep started
|
|||||||
let sweepInProgress = false;
|
let sweepInProgress = false;
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
const sseClients = new Set();
|
const sseClients = new Set();
|
||||||
|
const staleAlertState = {};
|
||||||
|
|
||||||
// === Delta/Memory ===
|
// === Delta/Memory ===
|
||||||
const memory = new MemoryManager(RUNS_DIR);
|
const memory = new MemoryManager(RUNS_DIR);
|
||||||
@@ -411,6 +413,31 @@ function buildHealth() {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function notifyIfDataStale(context = 'scheduled sweep') {
|
||||||
|
const health = buildHealth();
|
||||||
|
const decision = shouldSendStaleAlert(health, staleAlertState, {
|
||||||
|
cooldownMs: config.staleAlertCooldownMinutes * 60 * 1000,
|
||||||
|
});
|
||||||
|
if (!decision.send) return false;
|
||||||
|
|
||||||
|
const dashboardUrl = config.dashboardUrl || `http://localhost:${config.port}`;
|
||||||
|
const message = formatStaleAlert(health, { dashboardUrl, context });
|
||||||
|
const sends = [];
|
||||||
|
if (telegramAlerter.isConfigured) sends.push(telegramAlerter.sendMessage(message));
|
||||||
|
if (discordAlerter.isConfigured) sends.push(discordAlerter.sendAlert(message));
|
||||||
|
|
||||||
|
if (sends.length === 0) {
|
||||||
|
console.warn('[Crucix] Data is stale but no operator alert channel is configured');
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await Promise.allSettled(sends);
|
||||||
|
const sent = results.some(r => r.status === 'fulfilled' && (r.value === true || r.value?.ok === true));
|
||||||
|
if (sent) console.warn('[Crucix] Operator stale-data alert sent');
|
||||||
|
else console.warn('[Crucix] Operator stale-data alert attempted but no channel accepted it');
|
||||||
|
return sent;
|
||||||
|
}
|
||||||
|
|
||||||
function buildBrief(data) {
|
function buildBrief(data) {
|
||||||
const verbosity = config.telegram.briefVerbosity || 'standard';
|
const verbosity = config.telegram.briefVerbosity || 'standard';
|
||||||
const delta = memory.getLastDelta();
|
const delta = memory.getLastDelta();
|
||||||
@@ -553,6 +580,9 @@ async function runSweepCycle() {
|
|||||||
broadcast({ type: 'sweep_error', error: err.message });
|
broadcast({ type: 'sweep_error', error: err.message });
|
||||||
} finally {
|
} finally {
|
||||||
sweepInProgress = false;
|
sweepInProgress = false;
|
||||||
|
await notifyIfDataStale(lastSweepError ? 'failed sweep' : 'completed sweep').catch(err => {
|
||||||
|
console.error('[Crucix] Stale-data operator alert failed:', err.message);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import test from 'node:test';
|
import test from 'node:test';
|
||||||
import assert from 'node:assert/strict';
|
import assert from 'node:assert/strict';
|
||||||
import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs';
|
import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs';
|
||||||
|
import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs';
|
||||||
|
|
||||||
test('safeFetch reports HTML as degraded JSON response', async () => {
|
test('safeFetch reports HTML as degraded JSON response', async () => {
|
||||||
const originalFetch = globalThis.fetch;
|
const originalFetch = globalThis.fetch;
|
||||||
@@ -34,3 +35,60 @@ test('safeFetchText returns text and byte count', async () => {
|
|||||||
globalThis.fetch = originalFetch;
|
globalThis.fetch = originalFetch;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test('stale alert is skipped for fresh health and resets active key', () => {
|
||||||
|
const state = { lastStaleAlertKey: 'old', lastStaleAlertAt: 100 };
|
||||||
|
const decision = shouldSendStaleAlert({ stale: false }, state, { now: 200 });
|
||||||
|
assert.equal(decision.send, false);
|
||||||
|
assert.equal(decision.reason, 'not_stale');
|
||||||
|
assert.equal(state.lastStaleAlertKey, null);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('stale alert sends once and deduplicates during cooldown', () => {
|
||||||
|
const state = {};
|
||||||
|
const health = {
|
||||||
|
stale: true,
|
||||||
|
lastSuccessfulSweep: '2026-05-17T08:00:00.000Z',
|
||||||
|
lastSweepError: 'network timeout',
|
||||||
|
sourcesFailed: 2,
|
||||||
|
sourcesDegraded: 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
const first = shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 });
|
||||||
|
const second = shouldSendStaleAlert(health, state, { now: 2_000, cooldownMs: 60_000 });
|
||||||
|
|
||||||
|
assert.equal(first.send, true);
|
||||||
|
assert.equal(second.send, false);
|
||||||
|
assert.equal(second.reason, 'cooldown');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('stale alert repeats after cooldown', () => {
|
||||||
|
const state = {};
|
||||||
|
const health = { stale: true, lastSuccessfulSweep: 'a', lastSweepError: 'timeout', sourcesFailed: 1 };
|
||||||
|
|
||||||
|
assert.equal(shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 }).send, true);
|
||||||
|
assert.equal(shouldSendStaleAlert(health, state, { now: 62_000, cooldownMs: 60_000 }).send, true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('stale alert message includes operator context and affected sources', () => {
|
||||||
|
const message = formatStaleAlert({
|
||||||
|
status: 'stale',
|
||||||
|
stale: true,
|
||||||
|
dataAgeSeconds: 7200,
|
||||||
|
lastSuccessfulSweep: '2026-05-17T08:00:00.000Z',
|
||||||
|
lastSweep: '2026-05-17T10:00:00.000Z',
|
||||||
|
lastSweepError: 'GDELT timeout',
|
||||||
|
sourcesOk: 20,
|
||||||
|
sourcesDegraded: 3,
|
||||||
|
sourcesFailed: 2,
|
||||||
|
sourceHealth: [
|
||||||
|
{ name: 'GDELT', status: 'degraded', error: 'timeout' },
|
||||||
|
{ name: 'Reddit', status: 'no_credentials' },
|
||||||
|
],
|
||||||
|
}, { dashboardUrl: 'https://terminal.example.test', context: 'failed sweep' });
|
||||||
|
|
||||||
|
assert.match(message, /CRUCIX STALE DATA ALERT/);
|
||||||
|
assert.match(message, /Data age: 120 minutes/);
|
||||||
|
assert.match(message, /GDELT: degraded \(timeout\)/);
|
||||||
|
assert.match(message, /Dashboard: https:\/\/terminal\.example\.test/);
|
||||||
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user