feat: alert operators on stale data
This commit is contained in:
52
lib/stale-alerts.mjs
Normal file
52
lib/stale-alerts.mjs
Normal file
@@ -0,0 +1,52 @@
|
||||
const DEFAULT_COOLDOWN_MS = 60 * 60 * 1000;
|
||||
|
||||
export function shouldSendStaleAlert(health, state = {}, opts = {}) {
|
||||
const now = opts.now ?? Date.now();
|
||||
const cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
|
||||
if (!health?.stale) {
|
||||
state.lastStaleAlertKey = null;
|
||||
return { send: false, reason: 'not_stale' };
|
||||
}
|
||||
|
||||
const key = [
|
||||
health.lastSuccessfulSweep || 'never',
|
||||
health.lastSweepError || 'no-error',
|
||||
health.sourcesFailed || 0,
|
||||
health.sourcesDegraded || 0,
|
||||
].join('|');
|
||||
|
||||
if (state.lastStaleAlertKey === key && now - (state.lastStaleAlertAt || 0) < cooldownMs) {
|
||||
return { send: false, reason: 'cooldown', key };
|
||||
}
|
||||
|
||||
state.lastStaleAlertKey = key;
|
||||
state.lastStaleAlertAt = now;
|
||||
return { send: true, reason: 'stale', key };
|
||||
}
|
||||
|
||||
export function formatStaleAlert(health, opts = {}) {
|
||||
const dashboardUrl = opts.dashboardUrl || 'http://localhost:3117';
|
||||
const context = opts.context || 'scheduled sweep';
|
||||
const ageMinutes = health.dataAgeSeconds == null ? 'unknown' : Math.floor(health.dataAgeSeconds / 60);
|
||||
const affected = (health.sourceHealth || [])
|
||||
.filter(s => (s.status && s.status !== 'ok') || s.error)
|
||||
.slice(0, 6)
|
||||
.map(s => `- ${s.name || s.n || 'source'}: ${s.status || 'degraded'}${s.error ? ` (${String(s.error).slice(0, 100)})` : ''}`);
|
||||
|
||||
return [
|
||||
'*CRUCIX STALE DATA ALERT*',
|
||||
'',
|
||||
`Context: ${context}`,
|
||||
`Status: ${health.status || 'unknown'}`,
|
||||
`Data age: ${ageMinutes} minutes`,
|
||||
`Last successful sweep: ${health.lastSuccessfulSweep || 'never'}`,
|
||||
`Last attempted sweep: ${health.lastSweep || 'never'}`,
|
||||
`Last error: ${health.lastSweepError || 'none'}`,
|
||||
`Sources: ${health.sourcesOk || 0} OK / ${health.sourcesDegraded || 0} degraded / ${health.sourcesFailed || 0} failed`,
|
||||
'',
|
||||
'*Affected sources*',
|
||||
affected.length ? affected.join('\n') : '- No per-source errors available',
|
||||
'',
|
||||
`Dashboard: ${dashboardUrl}`,
|
||||
].join('\n');
|
||||
}
|
||||
Reference in New Issue
Block a user