From e574ad1c3deb2e37042d9214e65f366b30db9f3d Mon Sep 17 00:00:00 2001 From: MrSphay Date: Sun, 17 May 2026 13:58:32 +0200 Subject: [PATCH 1/2] feat: alert operators on stale data --- .env.example | 2 ++ README.md | 7 +++++ crucix.config.mjs | 2 ++ lib/stale-alerts.mjs | 52 +++++++++++++++++++++++++++++++++++ server.mjs | 30 ++++++++++++++++++++ test/fetch-utils.test.mjs | 58 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 151 insertions(+) create mode 100644 lib/stale-alerts.mjs diff --git a/.env.example b/.env.example index a862e47..b741b4f 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,8 @@ PORT=3117 REFRESH_INTERVAL_MINUTES=15 AUTO_OPEN_BROWSER=false STALE_DATA_MAX_AGE_MINUTES=60 +STALE_ALERT_COOLDOWN_MINUTES=60 +DASHBOARD_URL= TERMINAL_ACTIONS_ENABLED=true SWEEP_TOKEN= BRIEF_VERBOSITY=standard diff --git a/README.md b/README.md index 74e69f3..44bb983 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,8 @@ PORT=3117 REFRESH_INTERVAL_MINUTES=15 AUTO_OPEN_BROWSER=false STALE_DATA_MAX_AGE_MINUTES=60 +STALE_ALERT_COOLDOWN_MINUTES=60 +DASHBOARD_URL=https://intelligence.example.internal TERMINAL_ACTIONS_ENABLED=true SWEEP_TOKEN= BRIEF_VERBOSITY=standard @@ -188,6 +190,8 @@ LLM_MODEL=your-model For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-terminal:3117` (or the `PORT` you set). Missing API keys do not crash sweeps; affected sources are reported as degraded in `/api/health`. +When data remains stale past `STALE_DATA_MAX_AGE_MINUTES`, the server sends an operator alert through configured Telegram/Discord channels after failed or degraded sweep attempts. `STALE_ALERT_COOLDOWN_MINUTES` prevents repeated stale alerts from spamming every refresh interval. Set `DASHBOARD_URL` to the Pangolin/public URL you want included in those alerts. + The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`. #### Build And Publish Your Gitea Image @@ -525,6 +529,9 @@ All settings are in `.env` with sensible defaults: |----------|---------|-------------| | `PORT` | `3117` | Dashboard server port | | `REFRESH_INTERVAL_MINUTES` | `15` | Auto-refresh interval | +| `STALE_DATA_MAX_AGE_MINUTES` | `60` | Data age threshold for stale health state | +| `STALE_ALERT_COOLDOWN_MINUTES` | `60` | Minimum time between repeated operator stale-data alerts | +| `DASHBOARD_URL` | local URL | Dashboard URL included in operator alerts | | `LLM_PROVIDER` | disabled | `anthropic`, `openai`, `gemini`, `codex`, `openrouter`, `minimax`, `mistral`, or `grok` | | `LLM_API_KEY` | — | API key (not needed for codex) | | `LLM_MODEL` | per-provider default | Override model selection | diff --git a/crucix.config.mjs b/crucix.config.mjs index 19bbce2..c0604b0 100644 --- a/crucix.config.mjs +++ b/crucix.config.mjs @@ -23,6 +23,8 @@ export default { refreshIntervalMinutes: intEnv('REFRESH_INTERVAL_MINUTES', 15), autoOpenBrowser: boolEnv('AUTO_OPEN_BROWSER', false), staleDataMaxAgeMinutes: intEnv('STALE_DATA_MAX_AGE_MINUTES', 60), + staleAlertCooldownMinutes: intEnv('STALE_ALERT_COOLDOWN_MINUTES', 60), + dashboardUrl: process.env.DASHBOARD_URL || null, sweepToken: process.env.SWEEP_TOKEN || null, terminalActionsEnabled: boolEnv('TERMINAL_ACTIONS_ENABLED', true), diff --git a/lib/stale-alerts.mjs b/lib/stale-alerts.mjs new file mode 100644 index 0000000..02835ac --- /dev/null +++ b/lib/stale-alerts.mjs @@ -0,0 +1,52 @@ +const DEFAULT_COOLDOWN_MS = 60 * 60 * 1000; + +export function shouldSendStaleAlert(health, state = {}, opts = {}) { + const now = opts.now ?? Date.now(); + const cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS; + if (!health?.stale) { + state.lastStaleAlertKey = null; + return { send: false, reason: 'not_stale' }; + } + + const key = [ + health.lastSuccessfulSweep || 'never', + health.lastSweepError || 'no-error', + health.sourcesFailed || 0, + health.sourcesDegraded || 0, + ].join('|'); + + if (state.lastStaleAlertKey === key && now - (state.lastStaleAlertAt || 0) < cooldownMs) { + return { send: false, reason: 'cooldown', key }; + } + + state.lastStaleAlertKey = key; + state.lastStaleAlertAt = now; + return { send: true, reason: 'stale', key }; +} + +export function formatStaleAlert(health, opts = {}) { + const dashboardUrl = opts.dashboardUrl || 'http://localhost:3117'; + const context = opts.context || 'scheduled sweep'; + const ageMinutes = health.dataAgeSeconds == null ? 'unknown' : Math.floor(health.dataAgeSeconds / 60); + const affected = (health.sourceHealth || []) + .filter(s => (s.status && s.status !== 'ok') || s.error) + .slice(0, 6) + .map(s => `- ${s.name || s.n || 'source'}: ${s.status || 'degraded'}${s.error ? ` (${String(s.error).slice(0, 100)})` : ''}`); + + return [ + '*CRUCIX STALE DATA ALERT*', + '', + `Context: ${context}`, + `Status: ${health.status || 'unknown'}`, + `Data age: ${ageMinutes} minutes`, + `Last successful sweep: ${health.lastSuccessfulSweep || 'never'}`, + `Last attempted sweep: ${health.lastSweep || 'never'}`, + `Last error: ${health.lastSweepError || 'none'}`, + `Sources: ${health.sourcesOk || 0} OK / ${health.sourcesDegraded || 0} degraded / ${health.sourcesFailed || 0} failed`, + '', + '*Affected sources*', + affected.length ? affected.join('\n') : '- No per-source errors available', + '', + `Dashboard: ${dashboardUrl}`, + ].join('\n'); +} diff --git a/server.mjs b/server.mjs index 95949f0..58bc25e 100644 --- a/server.mjs +++ b/server.mjs @@ -18,6 +18,7 @@ import { TelegramAlerter } from './lib/alerts/telegram.mjs'; import { DiscordAlerter } from './lib/alerts/discord.mjs'; import { getFetchMetrics } from './apis/utils/fetch.mjs'; import { IntelligenceStore } from './lib/intelligence-store.mjs'; +import { formatStaleAlert, shouldSendStaleAlert } from './lib/stale-alerts.mjs'; const __dirname = dirname(fileURLToPath(import.meta.url)); const ROOT = __dirname; @@ -39,6 +40,7 @@ let sweepStartedAt = null; // Timestamp when current/last sweep started let sweepInProgress = false; const startTime = Date.now(); const sseClients = new Set(); +const staleAlertState = {}; // === Delta/Memory === const memory = new MemoryManager(RUNS_DIR); @@ -411,6 +413,31 @@ function buildHealth() { }; } +async function notifyIfDataStale(context = 'scheduled sweep') { + const health = buildHealth(); + const decision = shouldSendStaleAlert(health, staleAlertState, { + cooldownMs: config.staleAlertCooldownMinutes * 60 * 1000, + }); + if (!decision.send) return false; + + const dashboardUrl = config.dashboardUrl || `http://localhost:${config.port}`; + const message = formatStaleAlert(health, { dashboardUrl, context }); + const sends = []; + if (telegramAlerter.isConfigured) sends.push(telegramAlerter.sendMessage(message)); + if (discordAlerter.isConfigured) sends.push(discordAlerter.sendAlert(message)); + + if (sends.length === 0) { + console.warn('[Crucix] Data is stale but no operator alert channel is configured'); + return false; + } + + const results = await Promise.allSettled(sends); + const sent = results.some(r => r.status === 'fulfilled' && (r.value === true || r.value?.ok === true)); + if (sent) console.warn('[Crucix] Operator stale-data alert sent'); + else console.warn('[Crucix] Operator stale-data alert attempted but no channel accepted it'); + return sent; +} + function buildBrief(data) { const verbosity = config.telegram.briefVerbosity || 'standard'; const delta = memory.getLastDelta(); @@ -553,6 +580,9 @@ async function runSweepCycle() { broadcast({ type: 'sweep_error', error: err.message }); } finally { sweepInProgress = false; + await notifyIfDataStale(lastSweepError ? 'failed sweep' : 'completed sweep').catch(err => { + console.error('[Crucix] Stale-data operator alert failed:', err.message); + }); } } diff --git a/test/fetch-utils.test.mjs b/test/fetch-utils.test.mjs index 2dcee45..09cf265 100644 --- a/test/fetch-utils.test.mjs +++ b/test/fetch-utils.test.mjs @@ -1,6 +1,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; +import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs'; test('safeFetch reports HTML as degraded JSON response', async () => { const originalFetch = globalThis.fetch; @@ -34,3 +35,60 @@ test('safeFetchText returns text and byte count', async () => { globalThis.fetch = originalFetch; } }); + +test('stale alert is skipped for fresh health and resets active key', () => { + const state = { lastStaleAlertKey: 'old', lastStaleAlertAt: 100 }; + const decision = shouldSendStaleAlert({ stale: false }, state, { now: 200 }); + assert.equal(decision.send, false); + assert.equal(decision.reason, 'not_stale'); + assert.equal(state.lastStaleAlertKey, null); +}); + +test('stale alert sends once and deduplicates during cooldown', () => { + const state = {}; + const health = { + stale: true, + lastSuccessfulSweep: '2026-05-17T08:00:00.000Z', + lastSweepError: 'network timeout', + sourcesFailed: 2, + sourcesDegraded: 1, + }; + + const first = shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 }); + const second = shouldSendStaleAlert(health, state, { now: 2_000, cooldownMs: 60_000 }); + + assert.equal(first.send, true); + assert.equal(second.send, false); + assert.equal(second.reason, 'cooldown'); +}); + +test('stale alert repeats after cooldown', () => { + const state = {}; + const health = { stale: true, lastSuccessfulSweep: 'a', lastSweepError: 'timeout', sourcesFailed: 1 }; + + assert.equal(shouldSendStaleAlert(health, state, { now: 1_000, cooldownMs: 60_000 }).send, true); + assert.equal(shouldSendStaleAlert(health, state, { now: 62_000, cooldownMs: 60_000 }).send, true); +}); + +test('stale alert message includes operator context and affected sources', () => { + const message = formatStaleAlert({ + status: 'stale', + stale: true, + dataAgeSeconds: 7200, + lastSuccessfulSweep: '2026-05-17T08:00:00.000Z', + lastSweep: '2026-05-17T10:00:00.000Z', + lastSweepError: 'GDELT timeout', + sourcesOk: 20, + sourcesDegraded: 3, + sourcesFailed: 2, + sourceHealth: [ + { name: 'GDELT', status: 'degraded', error: 'timeout' }, + { name: 'Reddit', status: 'no_credentials' }, + ], + }, { dashboardUrl: 'https://terminal.example.test', context: 'failed sweep' }); + + assert.match(message, /CRUCIX STALE DATA ALERT/); + assert.match(message, /Data age: 120 minutes/); + assert.match(message, /GDELT: degraded \(timeout\)/); + assert.match(message, /Dashboard: https:\/\/terminal\.example\.test/); +}); From 900f43ba1384de874a38de633e7f5c7ae22a2130 Mon Sep 17 00:00:00 2001 From: MrSphay Date: Sun, 17 May 2026 18:54:35 +0200 Subject: [PATCH 2/2] fix: make news geotagging deterministic --- dashboard/inject.mjs | 42 ++++++++++++++++++++++---- package.json | 2 +- test/dashboard-geotagging.test.mjs | 47 ++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 6 deletions(-) create mode 100644 test/dashboard-geotagging.test.mjs diff --git a/dashboard/inject.mjs b/dashboard/inject.mjs index 1c78935..84cb0fa 100644 --- a/dashboard/inject.mjs +++ b/dashboard/inject.mjs @@ -83,16 +83,48 @@ const geoKeywords = { 'IMF':[38.9,-77],'World Bank':[38.9,-77],'UN':[40.7,-74], }; -function geoTagText(text) { +function escapeRegex(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function geoKeywordRegex(keyword) { + const flags = keyword.length <= 3 && keyword === keyword.toUpperCase() ? 'u' : 'iu'; + return new RegExp(`(^|[^\\p{L}\\p{N}])${escapeRegex(keyword)}(?=$|[^\\p{L}\\p{N}])`, flags); +} + +const geoKeywordEntries = Object.entries(geoKeywords) + .sort((a, b) => b[0].length - a[0].length) + .map(([keyword, coords]) => ({ keyword, coords, pattern: geoKeywordRegex(keyword) })); + +export function geoTagText(text) { if (!text) return null; - for (const [keyword, [lat, lon]] of Object.entries(geoKeywords)) { - if (text.includes(keyword)) { + for (const { keyword, coords, pattern } of geoKeywordEntries) { + if (pattern.test(text)) { + const [lat, lon] = coords; return { lat, lon, region: keyword }; } } return null; } +function stableHash(value) { + let hash = 2166136261; + for (let i = 0; i < value.length; i++) { + hash ^= value.charCodeAt(i); + hash = Math.imul(hash, 16777619); + } + return hash >>> 0; +} + +export function stableGeoJitter(key, axis) { + const bucket = stableHash(`${axis}:${key}`) / 0xffffffff; + return (bucket - 0.5) * 2; +} + +function newsGeoKey(item) { + return `${item.source || ''}|${item.title || ''}|${item.date || ''}|${item.url || ''}`; +} + function sanitizeExternalUrl(raw) { if (!raw) return undefined; try { @@ -235,8 +267,8 @@ export async function fetchAllNews() { source: item.source, date: item.date, url: item.url, - lat: geo.lat + (Math.random() - 0.5) * 2, - lon: geo.lon + (Math.random() - 0.5) * 2, + lat: geo.lat + stableGeoJitter(newsGeoKey(item), 'lat'), + lon: geo.lon + stableGeoJitter(newsGeoKey(item), 'lon'), region: geo.region }); } diff --git a/package.json b/package.json index 09bc405..d814320 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "brief:save": "node apis/save-briefing.mjs", "diag": "node diag.mjs", "test": "npm run test:unit", - "test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs", + "test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs test/dashboard-geotagging.test.mjs", "compose:config": "docker compose config", "clean": "node scripts/clean.mjs", "fresh-start": "npm run clean && npm start" diff --git a/test/dashboard-geotagging.test.mjs b/test/dashboard-geotagging.test.mjs new file mode 100644 index 0000000..cab73dc --- /dev/null +++ b/test/dashboard-geotagging.test.mjs @@ -0,0 +1,47 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { geoTagText, stableGeoJitter } from '../dashboard/inject.mjs'; + +test('geoTagText matches headlines case-insensitively', () => { + assert.deepEqual(geoTagText('ukraine reports new air defense activity'), { + lat: 49, + lon: 32, + region: 'Ukraine', + }); + + assert.deepEqual(geoTagText('flooding disrupts são paulo transport'), { + lat: -23.5, + lon: -46.6, + region: 'São Paulo', + }); +}); + +test('geoTagText prefers longer place names before broad countries', () => { + assert.deepEqual(geoTagText('New York markets react before wider US session'), { + lat: 40.7, + lon: -74, + region: 'New York', + }); +}); + +test('geoTagText uses word boundaries to reduce false positives', () => { + assert.equal(geoTagText('A music festival announces its lineup'), null); + assert.equal(geoTagText('Officials discuss a new focus for aid'), null); + assert.deepEqual(geoTagText('US officials discuss a new aid package'), { + lat: 39, + lon: -98, + region: 'US', + }); +}); + +test('stableGeoJitter is deterministic and bounded', () => { + const key = 'BBC|lower-case ukraine headline|Sun, 17 May 2026 12:00:00 GMT|https://example.test/a'; + const latA = stableGeoJitter(key, 'lat'); + const latB = stableGeoJitter(key, 'lat'); + const lon = stableGeoJitter(key, 'lon'); + + assert.equal(latA, latB); + assert.notEqual(latA, lon); + assert.ok(latA >= -1 && latA <= 1); + assert.ok(lon >= -1 && lon <= 1); +});