From 5c08355e38d30045d72a0dfb37d72913d21c0d66 Mon Sep 17 00:00:00 2001 From: calesthio Date: Tue, 24 Mar 2026 18:48:55 -0700 Subject: [PATCH] Fix Telegram dedup identity and legacy Markdown escaping --- lib/alerts/telegram.mjs | 4 +++- lib/delta/engine.mjs | 34 ++++++++++++++++++++++++++++------ lib/delta/memory.mjs | 10 +++++++++- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/lib/alerts/telegram.mjs b/lib/alerts/telegram.mjs index 1a7f6ae..580a902 100644 --- a/lib/alerts/telegram.mjs +++ b/lib/alerts/telegram.mjs @@ -744,7 +744,9 @@ Respond with ONLY valid JSON: function escapeMd(text) { if (!text) return ''; - return text.replace(/([_*\[\]()~`>#+\-=|{}.!\\])/g, '\\$1'); + // The bot sends alerts with legacy Markdown parse mode, not MarkdownV2. + // Escape only the characters that legacy Markdown actually treats as markup. + return text.replace(/([_*`\[])/g, '\\$1'); } function parseJSON(text) { diff --git a/lib/delta/engine.mjs b/lib/delta/engine.mjs index c98d50f..ec45ddb 100644 --- a/lib/delta/engine.mjs +++ b/lib/delta/engine.mjs @@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm // ─── Semantic Hashing for Telegram Posts ───────────────────────────────────── /** - * Produce a normalized hash of a post's content. - * Strips timestamps, normalizes numbers, lowercases — so "BREAKING: 5 missiles at 14:32" - * and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals). + * Produce a normalized semantic hash of a post's content. + * This is intentionally lossy and is only safe as a fallback when a stable + * post identity is unavailable. */ function contentHash(text) { if (!text) return ''; @@ -83,12 +83,32 @@ function contentHash(text) { return createHash('sha256').update(normalized).digest('hex').substring(0, 12); } +function stablePostKey(post) { + if (!post) return ''; + + const sourceId = post.postId || post.messageId || ''; + const channelId = post.channel || post.chat || ''; + const date = post.date || ''; + const text = (post.text || '').trim().substring(0, 200); + + if (sourceId) return `id:${sourceId}`; + if (channelId && date) { + return createHash('sha256') + .update(`${channelId}|${date}|${text}`) + .digest('hex') + .substring(0, 16); + } + + return `semantic:${contentHash(post.text)}`; +} + // ─── Core Delta Computation ────────────────────────────────────────────────── /** * @param {object} current - current sweep's synthesized data * @param {object|null} previous - previous sweep's synthesized data (null on first run) * @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} } + * @param {Array} [priorRuns] - optional compacted prior runs for broader dedup */ export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) { if (!previous) return null; @@ -153,14 +173,16 @@ export function computeDelta(current, previous, thresholdOverrides = {}, priorRu // ─── New urgent Telegram posts (semantic dedup) ────────────────────── // Dedup against all recent runs (not just the last one) to catch posts that - // drop out of one sweep but reappear in a later one. + // drop out of one sweep but reappear in a later one. Use stable post identity + // where possible so updated posts are not collapsed into earlier alerts just + // because their text is semantically similar. const sources = priorRuns.length > 0 ? priorRuns : [previous]; const prevHashes = new Set( - sources.flatMap(run => (run?.tg?.urgent || []).map(p => contentHash(p.text))) + sources.flatMap(run => (run?.tg?.urgent || []).map(stablePostKey)).filter(Boolean) ); for (const post of (current.tg?.urgent || [])) { - const hash = contentHash(post.text); + const hash = stablePostKey(post); if (hash && !prevHashes.has(hash)) { signals.new.push({ key: `tg_urgent:${hash}`, diff --git a/lib/delta/memory.mjs b/lib/delta/memory.mjs index a551ebb..66986f0 100644 --- a/lib/delta/memory.mjs +++ b/lib/delta/memory.mjs @@ -201,7 +201,15 @@ export class MemoryManager { bls: data.bls, treasury: data.treasury, gscpi: data.gscpi, - tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text, date: p.date })) }, + tg: { + posts: data.tg?.posts, + urgent: (data.tg?.urgent || []).map(p => ({ + text: p.text, + date: p.date, + channel: p.channel || p.chat || null, + postId: p.postId || null, + })), + }, thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })), air: (data.air || []).map(a => ({ region: a.region, total: a.total })), nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })),