diff --git a/.env.example b/.env.example index a862e47..b741b4f 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,8 @@ PORT=3117 REFRESH_INTERVAL_MINUTES=15 AUTO_OPEN_BROWSER=false STALE_DATA_MAX_AGE_MINUTES=60 +STALE_ALERT_COOLDOWN_MINUTES=60 +DASHBOARD_URL= TERMINAL_ACTIONS_ENABLED=true SWEEP_TOKEN= BRIEF_VERBOSITY=standard diff --git a/README.md b/README.md index 74e69f3..44bb983 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,8 @@ PORT=3117 REFRESH_INTERVAL_MINUTES=15 AUTO_OPEN_BROWSER=false STALE_DATA_MAX_AGE_MINUTES=60 +STALE_ALERT_COOLDOWN_MINUTES=60 +DASHBOARD_URL=https://intelligence.example.internal TERMINAL_ACTIONS_ENABLED=true SWEEP_TOKEN= BRIEF_VERBOSITY=standard @@ -188,6 +190,8 @@ LLM_MODEL=your-model For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-terminal:3117` (or the `PORT` you set). Missing API keys do not crash sweeps; affected sources are reported as degraded in `/api/health`. +When data remains stale past `STALE_DATA_MAX_AGE_MINUTES`, the server sends an operator alert through configured Telegram/Discord channels after failed or degraded sweep attempts. `STALE_ALERT_COOLDOWN_MINUTES` prevents repeated stale alerts from spamming every refresh interval. Set `DASHBOARD_URL` to the Pangolin/public URL you want included in those alerts. + The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`. #### Build And Publish Your Gitea Image @@ -525,6 +529,9 @@ All settings are in `.env` with sensible defaults: |----------|---------|-------------| | `PORT` | `3117` | Dashboard server port | | `REFRESH_INTERVAL_MINUTES` | `15` | Auto-refresh interval | +| `STALE_DATA_MAX_AGE_MINUTES` | `60` | Data age threshold for stale health state | +| `STALE_ALERT_COOLDOWN_MINUTES` | `60` | Minimum time between repeated operator stale-data alerts | +| `DASHBOARD_URL` | local URL | Dashboard URL included in operator alerts | | `LLM_PROVIDER` | disabled | `anthropic`, `openai`, `gemini`, `codex`, `openrouter`, `minimax`, `mistral`, or `grok` | | `LLM_API_KEY` | — | API key (not needed for codex) | | `LLM_MODEL` | per-provider default | Override model selection | diff --git a/crucix.config.mjs b/crucix.config.mjs index 19bbce2..c0604b0 100644 --- a/crucix.config.mjs +++ b/crucix.config.mjs @@ -23,6 +23,8 @@ export default { refreshIntervalMinutes: intEnv('REFRESH_INTERVAL_MINUTES', 15), autoOpenBrowser: boolEnv('AUTO_OPEN_BROWSER', false), staleDataMaxAgeMinutes: intEnv('STALE_DATA_MAX_AGE_MINUTES', 60), + staleAlertCooldownMinutes: intEnv('STALE_ALERT_COOLDOWN_MINUTES', 60), + dashboardUrl: process.env.DASHBOARD_URL || null, sweepToken: process.env.SWEEP_TOKEN || null, terminalActionsEnabled: boolEnv('TERMINAL_ACTIONS_ENABLED', true), diff --git a/lib/stale-alerts.mjs b/lib/stale-alerts.mjs new file mode 100644 index 0000000..02835ac --- /dev/null +++ b/lib/stale-alerts.mjs @@ -0,0 +1,52 @@ +const DEFAULT_COOLDOWN_MS = 60 * 60 * 1000; + +export function shouldSendStaleAlert(health, state = {}, opts = {}) { + const now = opts.now ?? Date.now(); + const cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS; + if (!health?.stale) { + state.lastStaleAlertKey = null; + return { send: false, reason: 'not_stale' }; + } + + const key = [ + health.lastSuccessfulSweep || 'never', + health.lastSweepError || 'no-error', + health.sourcesFailed || 0, + health.sourcesDegraded || 0, + ].join('|'); + + if (state.lastStaleAlertKey === key && now - (state.lastStaleAlertAt || 0) < cooldownMs) { + return { send: false, reason: 'cooldown', key }; + } + + state.lastStaleAlertKey = key; + state.lastStaleAlertAt = now; + return { send: true, reason: 'stale', key }; +} + +export function formatStaleAlert(health, opts = {}) { + const dashboardUrl = opts.dashboardUrl || 'http://localhost:3117'; + const context = opts.context || 'scheduled sweep'; + const ageMinutes = health.dataAgeSeconds == null ? 'unknown' : Math.floor(health.dataAgeSeconds / 60); + const affected = (health.sourceHealth || []) + .filter(s => (s.status && s.status !== 'ok') || s.error) + .slice(0, 6) + .map(s => `- ${s.name || s.n || 'source'}: ${s.status || 'degraded'}${s.error ? ` (${String(s.error).slice(0, 100)})` : ''}`); + + return [ + '*CRUCIX STALE DATA ALERT*', + '', + `Context: ${context}`, + `Status: ${health.status || 'unknown'}`, + `Data age: ${ageMinutes} minutes`, + `Last successful sweep: ${health.lastSuccessfulSweep || 'never'}`, + `Last attempted sweep: ${health.lastSweep || 'never'}`, + `Last error: ${health.lastSweepError || 'none'}`, + `Sources: ${health.sourcesOk || 0} OK / ${health.sourcesDegraded || 0} degraded / ${health.sourcesFailed || 0} failed`, + '', + '*Affected sources*', + affected.length ? affected.join('\n') : '- No per-source errors available', + '', + `Dashboard: ${dashboardUrl}`, + ].join('\n'); +} diff --git a/server.mjs b/server.mjs index 95949f0..58bc25e 100644 --- a/server.mjs +++ b/server.mjs @@ -18,6 +18,7 @@ import { TelegramAlerter } from './lib/alerts/telegram.mjs'; import { DiscordAlerter } from './lib/alerts/discord.mjs'; import { getFetchMetrics } from './apis/utils/fetch.mjs'; import { IntelligenceStore } from './lib/intelligence-store.mjs'; +import { formatStaleAlert, shouldSendStaleAlert } from './lib/stale-alerts.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const ROOT = __dirname; @@ -39,6 +40,7 @@ let sweepStartedAt = null; // Timestamp when current/last sweep started let sweepInProgress = false; const startTime = Date.now(); const sseClients = new Set(); +const staleAlertState = {}; // === Delta/Memory === const memory = new MemoryManager(RUNS_DIR); @@ -411,6 +413,31 @@ function buildHealth() { }; } +async function notifyIfDataStale(context = 'scheduled sweep') { + const health = buildHealth(); + const decision = shouldSendStaleAlert(health, staleAlertState, { + cooldownMs: config.staleAlertCooldownMinutes * 60 * 1000, + }); + if (!decision.send) return false; + + const dashboardUrl = config.dashboardUrl || `http://localhost:${config.port}`; + const message = formatStaleAlert(health, { dashboardUrl, context }); + const sends = []; + if (telegramAlerter.isConfigured) sends.push(telegramAlerter.sendMessage(message)); + if (discordAlerter.isConfigured) sends.push(discordAlerter.sendAlert(message)); + + if (sends.length === 0) { + console.warn('[Crucix] Data is stale but no operator alert channel is configured'); + return false; + } + + const results = await Promise.allSettled(sends); + const sent = results.some(r => r.status === 'fulfilled' && (r.value === true || r.value?.ok === true)); + if (sent) console.warn('[Crucix] Operator stale-data alert sent'); + else console.warn('[Crucix] Operator stale-data alert attempted but no channel accepted it'); + return sent; +} + function buildBrief(data) { const verbosity = config.telegram.briefVerbosity || 'standard'; const delta = memory.getLastDelta(); @@ -553,6 +580,9 @@ async function runSweepCycle() { broadcast({ type: 'sweep_error', error: err.message }); } finally { sweepInProgress = false; + await notifyIfDataStale(lastSweepError ? 'failed sweep' : 'completed sweep').catch(err => { + console.error('[Crucix] Stale-data operator alert failed:', err.message); + }); } } diff --git a/test/fetch-utils.test.mjs b/test/fetch-utils.test.mjs index 2dcee45..09cf265 100644 --- a/test/fetch-utils.test.mjs +++ b/test/fetch-utils.test.mjs @@ -1,6 +1,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; +import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs'; test('safeFetch reports HTML as degraded JSON response', async () => { const originalFetch = globalThis.fetch; @@ -34,3 +35,60 @@ test('safeFetchText returns text and byte count', async () => { globalThis.fetch = originalFetch; } }); + +test('stale alert is skipped for fresh health and resets active key', () => { + const state = { lastStaleAlertKey: 'old', lastStaleAlertAt: 100 }; + const decision = shouldSendStaleAlert({ stale: false }, state, { now: 200 }); + assert.equal(decision.send, false); + assert.equal(decision.reason, 'not_stale'); + assert.equal(state.lastStaleAlertKey, null); +}); + +test('stale alert sends once and deduplicates during cooldown', () => { + const state = {}; + const health = { + stale: true, + lastSuccessfulSweep: '2026-05-17T08:00:00.000Z', + lastSweepError: 'network timeout', + sourcesFailed: 2, + sourcesDegraded: 1, + }; + + const first = shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 }); + const second = shouldSendStaleAlert(health, state, { now: 2_000, cooldownMs: 60_000 }); + + assert.equal(first.send, true); + assert.equal(second.send, false); + assert.equal(second.reason, 'cooldown'); +}); + +test('stale alert repeats after cooldown', () => { + const state = {}; + const health = { stale: true, lastSuccessfulSweep: 'a', lastSweepError: 'timeout', sourcesFailed: 1 }; + + assert.equal(shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 }).send, true); + assert.equal(shouldSendStaleAlert(health, state, { now: 62_000, cooldownMs: 60_000 }).send, true); +}); + +test('stale alert message includes operator context and affected sources', () => { + const message = formatStaleAlert({ + status: 'stale', + stale: true, + dataAgeSeconds: 7200, + lastSuccessfulSweep: '2026-05-17T08:00:00.000Z', + lastSweep: '2026-05-17T10:00:00.000Z', + lastSweepError: 'GDELT timeout', + sourcesOk: 20, + sourcesDegraded: 3, + sourcesFailed: 2, + sourceHealth: [ + { name: 'GDELT', status: 'degraded', error: 'timeout' }, + { name: 'Reddit', status: 'no_credentials' }, + ], + }, { dashboardUrl: 'https://terminal.example.test', context: 'failed sweep' }); + + assert.match(message, /CRUCIX STALE DATA ALERT/); + assert.match(message, /Data age: 120 minutes/); + assert.match(message, /GDELT: degraded \(timeout\)/); + assert.match(message, /Dashboard: https:\/\/terminal\.example\.test/); +});