Merge branch 'calesthio:master' into master
This commit is contained in:
@@ -94,7 +94,7 @@ export async function getChat(chatId) {
|
||||
// Compact a Bot API message for briefing output
|
||||
function compactBotMessage(msg) {
|
||||
return {
|
||||
text: (msg.text || msg.caption || '').slice(0, 300),
|
||||
text: msg.text || msg.caption || '',
|
||||
date: msg.date ? new Date(msg.date * 1000).toISOString() : null,
|
||||
chat: msg.chat?.title || msg.chat?.username || 'unknown',
|
||||
views: msg.views || 0,
|
||||
@@ -169,10 +169,12 @@ function parseWebPreview(html, channelId) {
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, "'")
|
||||
.replace(/�*39;/g, "'")
|
||||
.replace(/�*27;/gi, "'")
|
||||
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16)))
|
||||
.replace(/ /g, ' ')
|
||||
.trim()
|
||||
.slice(0, 300);
|
||||
.trim();
|
||||
}
|
||||
|
||||
// Extract view count
|
||||
|
||||
@@ -271,7 +271,7 @@ export class TelegramAlerter {
|
||||
headline: `OSINT Surge: ${osintNew.length} New Urgent Posts`,
|
||||
reason: `${osintNew.length} new urgent OSINT signals detected. Elevated conflict reporting tempo.`,
|
||||
actionable: 'Review OSINT stream for pattern. Cross-check with satellite and ACLED data.',
|
||||
signals: osintNew.map(s => (s.text || '').substring(0, 40)).slice(0, 3),
|
||||
signals: osintNew.map(s => s.text || s.label || s.key).slice(0, 5),
|
||||
crossCorrelation: 'telegram OSINT',
|
||||
};
|
||||
}
|
||||
@@ -681,7 +681,7 @@ Respond with ONLY valid JSON:
|
||||
if (osintSignals.length > 0) {
|
||||
sections.push('📡 OSINT SIGNALS:\n' + osintSignals.map(s => {
|
||||
const post = s.item || s;
|
||||
return ` [${post.channel || 'UNKNOWN'}] ${(post.text || s.reason || '').substring(0, 150)}`;
|
||||
return ` [${post.channel || 'UNKNOWN'}] ${post.text || s.reason || ''}`;
|
||||
}).join('\n'));
|
||||
}
|
||||
|
||||
@@ -728,7 +728,10 @@ Respond with ONLY valid JSON:
|
||||
}
|
||||
|
||||
if (evaluation.signals?.length) {
|
||||
lines.push('', `Signals: ${evaluation.signals.join(' · ')}`);
|
||||
lines.push('', `*Signals:*`);
|
||||
for (const sig of evaluation.signals) {
|
||||
lines.push(`• ${escapeMd(sig)}`);
|
||||
}
|
||||
}
|
||||
|
||||
lines.push('', `_${new Date().toISOString().replace('T', ' ').substring(0, 19)} UTC_`);
|
||||
@@ -739,6 +742,13 @@ Respond with ONLY valid JSON:
|
||||
|
||||
// ─── Helpers ──────────────────────────────────────────────────────────────
|
||||
|
||||
function escapeMd(text) {
|
||||
if (!text) return '';
|
||||
// The bot sends alerts with legacy Markdown parse mode, not MarkdownV2.
|
||||
// Escape only the characters that legacy Markdown actually treats as markup.
|
||||
return text.replace(/([_*`\[])/g, '\\$1');
|
||||
}
|
||||
|
||||
function parseJSON(text) {
|
||||
if (!text) return null;
|
||||
let cleaned = text.trim();
|
||||
|
||||
@@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm
|
||||
// ─── Semantic Hashing for Telegram Posts ─────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Produce a normalized hash of a post's content.
|
||||
* Strips timestamps, normalizes numbers, lowercases — so "BREAKING: 5 missiles at 14:32"
|
||||
* and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals).
|
||||
* Produce a normalized semantic hash of a post's content.
|
||||
* This is intentionally lossy and is only safe as a fallback when a stable
|
||||
* post identity is unavailable.
|
||||
*/
|
||||
function contentHash(text) {
|
||||
if (!text) return '';
|
||||
@@ -83,14 +83,34 @@ function contentHash(text) {
|
||||
return createHash('sha256').update(normalized).digest('hex').substring(0, 12);
|
||||
}
|
||||
|
||||
function stablePostKey(post) {
|
||||
if (!post) return '';
|
||||
|
||||
const sourceId = post.postId || post.messageId || '';
|
||||
const channelId = post.channel || post.chat || '';
|
||||
const date = post.date || '';
|
||||
const text = (post.text || '').trim().substring(0, 200);
|
||||
|
||||
if (sourceId) return `id:${sourceId}`;
|
||||
if (channelId && date) {
|
||||
return createHash('sha256')
|
||||
.update(`${channelId}|${date}|${text}`)
|
||||
.digest('hex')
|
||||
.substring(0, 16);
|
||||
}
|
||||
|
||||
return `semantic:${contentHash(post.text)}`;
|
||||
}
|
||||
|
||||
// ─── Core Delta Computation ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @param {object} current - current sweep's synthesized data
|
||||
* @param {object|null} previous - previous sweep's synthesized data (null on first run)
|
||||
* @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} }
|
||||
* @param {Array<object>} [priorRuns] - optional compacted prior runs for broader dedup
|
||||
*/
|
||||
export function computeDelta(current, previous, thresholdOverrides = {}) {
|
||||
export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) {
|
||||
if (!previous) return null;
|
||||
if (!current) return null;
|
||||
|
||||
@@ -152,16 +172,21 @@ export function computeDelta(current, previous, thresholdOverrides = {}) {
|
||||
|
||||
// ─── New urgent Telegram posts (semantic dedup) ──────────────────────
|
||||
|
||||
// Dedup against all recent runs (not just the last one) to catch posts that
|
||||
// drop out of one sweep but reappear in a later one. Use stable post identity
|
||||
// where possible so updated posts are not collapsed into earlier alerts just
|
||||
// because their text is semantically similar.
|
||||
const sources = priorRuns.length > 0 ? priorRuns : [previous];
|
||||
const prevHashes = new Set(
|
||||
(previous.tg?.urgent || []).map(p => contentHash(p.text))
|
||||
sources.flatMap(run => (run?.tg?.urgent || []).map(stablePostKey)).filter(Boolean)
|
||||
);
|
||||
|
||||
for (const post of (current.tg?.urgent || [])) {
|
||||
const hash = contentHash(post.text);
|
||||
const hash = stablePostKey(post);
|
||||
if (hash && !prevHashes.has(hash)) {
|
||||
signals.new.push({
|
||||
key: `tg_urgent:${hash}`,
|
||||
text: post.text?.substring(0, 120),
|
||||
text: post.text,
|
||||
item: post,
|
||||
reason: 'New urgent OSINT post',
|
||||
});
|
||||
|
||||
@@ -74,7 +74,9 @@ export class MemoryManager {
|
||||
// Add a new run to hot memory
|
||||
addRun(synthesizedData) {
|
||||
const previous = this.getLastRun();
|
||||
const delta = computeDelta(synthesizedData, previous);
|
||||
// Collect urgent post hashes from all hot runs for broader dedup window
|
||||
const priorRuns = this.hot.runs.map(r => r.data);
|
||||
const delta = computeDelta(synthesizedData, previous, {}, priorRuns);
|
||||
|
||||
// Compact the data for storage (strip large arrays)
|
||||
const compact = this._compactForStorage(synthesizedData);
|
||||
@@ -199,7 +201,15 @@ export class MemoryManager {
|
||||
bls: data.bls,
|
||||
treasury: data.treasury,
|
||||
gscpi: data.gscpi,
|
||||
tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text?.substring(0, 80), date: p.date })) },
|
||||
tg: {
|
||||
posts: data.tg?.posts,
|
||||
urgent: (data.tg?.urgent || []).map(p => ({
|
||||
text: p.text,
|
||||
date: p.date,
|
||||
channel: p.channel || p.chat || null,
|
||||
postId: p.postId || null,
|
||||
})),
|
||||
},
|
||||
thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })),
|
||||
air: (data.air || []).map(a => ({ region: a.region, total: a.total })),
|
||||
nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })),
|
||||
|
||||
@@ -88,10 +88,20 @@ function compactSweepForLLM(data, delta, previousIdeas) {
|
||||
sections.push(`SUPPLY_CHAIN: GSCPI=${data.gscpi.value} (${data.gscpi.interpretation})`);
|
||||
}
|
||||
|
||||
// Geopolitical signals
|
||||
// Geopolitical signals (cap total OSINT text to ~1500 chars to keep prompt compact)
|
||||
const urgentPosts = (data.tg?.urgent || []).slice(0, 5);
|
||||
if (urgentPosts.length) {
|
||||
sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${(p.text || '').substring(0, 120)}`).join('\n')}`);
|
||||
const MAX_OSINT_CHARS = 1500;
|
||||
let remaining = MAX_OSINT_CHARS;
|
||||
const lines = [];
|
||||
for (const p of urgentPosts) {
|
||||
const text = p.text || '';
|
||||
if (remaining <= 0) break;
|
||||
const trimmed = text.length > remaining ? text.substring(0, remaining) + '…' : text;
|
||||
lines.push(`- ${trimmed}`);
|
||||
remaining -= trimmed.length;
|
||||
}
|
||||
sections.push(`URGENT_OSINT:\n${lines.join('\n')}`);
|
||||
}
|
||||
|
||||
// Thermal / fire detections
|
||||
|
||||
Reference in New Issue
Block a user