- Replace single ' handler with generic numeric/hex entity decoder so ' and other unpadded entities are properly converted - Dedup urgent OSINT posts against all hot memory runs (last 3 sweeps) instead of only the previous sweep, preventing posts that drop out of one sweep from reappearing as "new" in the next Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
380 lines
15 KiB
JavaScript
380 lines
15 KiB
JavaScript
// Telegram — public channel intelligence from conflict zones and OSINT analysts
|
|
// Primary mode: Bot API with TELEGRAM_BOT_TOKEN (getUpdates, getChat)
|
|
// Fallback mode: Scrape public channel web previews at https://t.me/s/{channel}
|
|
// Monitors conflict zones (Ukraine, Middle East), geopolitics, and OSINT channels.
|
|
|
|
import { safeFetch } from '../utils/fetch.mjs';
|
|
import '../utils/env.mjs';
|
|
|
|
function delay(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
|
|
// Curated list of well-known public OSINT / conflict / geopolitics channels
|
|
// All verified to have public web previews enabled at https://t.me/s/{id}
|
|
// Override with TELEGRAM_CHANNELS env var (comma-separated channel IDs)
|
|
const DEFAULT_CHANNELS = [
|
|
// === Conflict: Ukraine/Russia ===
|
|
{ id: 'intelslava', label: 'Intel Slava Z', topic: 'conflict', note: 'Conflict updates, pro-Russian perspective' },
|
|
{ id: 'legitimniy', label: 'Legitimniy', topic: 'conflict', note: 'Ukrainian politics & conflict analysis' },
|
|
{ id: 'wartranslated', label: 'War Translated', topic: 'conflict', note: 'Conflict translations & OSINT' },
|
|
{ id: 'ukraine_frontline', label: 'Ukraine Frontline', topic: 'conflict', note: 'Frontline situation updates' },
|
|
{ id: 'mod_russia', label: 'Russian MoD', topic: 'conflict', note: 'Russian Ministry of Defense official' },
|
|
{ id: 'CIG_telegram', label: 'Conflict Intel Team', topic: 'osint', note: 'Conflict Intelligence Team analysis' },
|
|
{ id: 'RVvoenkor', label: 'Voenkor RV', topic: 'conflict', note: 'Russian military correspondent' },
|
|
{ id: 'readovkanews', label: 'Readovka', topic: 'conflict', note: 'Russian conflict news aggregator' },
|
|
{ id: 'DeepStateUA', label: 'DeepState Ukraine', topic: 'conflict', note: 'Ukrainian frontline maps & analysis' },
|
|
{ id: 'operativnoZSU', label: 'ZSU Operative', topic: 'conflict', note: 'Ukrainian armed forces updates' },
|
|
{ id: 'GeneralStaffZSU', label: 'General Staff ZSU', topic: 'conflict', note: 'Ukrainian General Staff official' },
|
|
// === Middle East ===
|
|
{ id: 'middleeastosint', label: 'Middle East OSINT', topic: 'osint', note: 'Middle East open source intel' },
|
|
{ id: 'inikiforv', label: 'Nikiforov OSINT', topic: 'osint', note: 'Cross-regional OSINT analyst' },
|
|
// === Geopolitics & Analysis ===
|
|
{ id: 'geaborning', label: 'Geo A. Borning', topic: 'geopolitics', note: 'Geopolitical analysis and forecasting' },
|
|
{ id: 'TheIntelligencer', label: 'The Intelligencer', topic: 'osint', note: 'Intelligence community news' },
|
|
// === Markets & Finance ===
|
|
{ id: 'WallStreetSilver', label: 'Wall St Silver', topic: 'finance', note: 'Commodities and macro commentary' },
|
|
{ id: 'unusual_whales', label: 'Unusual Whales', topic: 'finance', note: 'Market flow and options analysis' },
|
|
];
|
|
|
|
// Allow user to add custom channels via env var
|
|
function loadChannels() {
|
|
const custom = process.env.TELEGRAM_CHANNELS;
|
|
if (!custom) return DEFAULT_CHANNELS;
|
|
|
|
const customIds = custom.split(',').map(s => s.trim()).filter(Boolean);
|
|
const existing = new Set(DEFAULT_CHANNELS.map(c => c.id));
|
|
|
|
const extras = customIds
|
|
.filter(id => !existing.has(id))
|
|
.map(id => ({ id, label: id, topic: 'custom', note: 'User-added channel' }));
|
|
|
|
return [...DEFAULT_CHANNELS, ...extras];
|
|
}
|
|
|
|
const CHANNELS = loadChannels();
|
|
|
|
// Urgent keywords that flag high-priority posts
|
|
// Organized by domain for maintainability
|
|
const URGENT_KEYWORDS = [
|
|
// Breaking / meta urgency
|
|
'breaking', 'urgent', 'alert', 'confirmed', 'just in', 'flash',
|
|
// Military / kinetic
|
|
'missile', 'strike', 'explosion', 'airstrike', 'drone', 'bombardment',
|
|
'shelling', 'intercept', 'ICBM', 'hypersonic', 'F-16', 'ATACMS', 'HIMARS',
|
|
// Escalation / de-escalation
|
|
'nuclear', 'chemical', 'biological', 'ceasefire', 'escalation', 'invasion',
|
|
'offensive', 'retreat', 'advance', 'mobilization', 'martial law',
|
|
// Geopolitical
|
|
'nato', 'coup', 'assassination', 'sanctions', 'embargo', 'blockade',
|
|
'summit', 'ultimatum', 'declaration of war', 'peace deal',
|
|
// Casualty / humanitarian
|
|
'casualties', 'killed', 'wounded', 'evacuation', 'refugee', 'humanitarian',
|
|
// Infrastructure / cyber
|
|
'blackout', 'sabotage', 'cyberattack', 'pipeline', 'dam', 'nuclear plant',
|
|
// Financial crisis
|
|
'default', 'bank run', 'circuit breaker', 'flash crash', 'emergency rate',
|
|
];
|
|
|
|
// ─── Bot API mode ───────────────────────────────────────────────────────────
|
|
|
|
const botBase = () => `https://api.telegram.org/bot${process.env.TELEGRAM_BOT_TOKEN}`;
|
|
|
|
// Get recent updates the bot has received
|
|
export async function getUpdates(opts = {}) {
|
|
const { limit = 100, offset = 0 } = opts;
|
|
const params = new URLSearchParams({ limit: String(limit), offset: String(offset) });
|
|
return safeFetch(`${botBase()}/getUpdates?${params}`);
|
|
}
|
|
|
|
// Get info about a chat/channel by username
|
|
export async function getChat(chatId) {
|
|
const params = new URLSearchParams({ chat_id: chatId.startsWith('@') ? chatId : `@${chatId}` });
|
|
return safeFetch(`${botBase()}/getChat?${params}`);
|
|
}
|
|
|
|
// Compact a Bot API message for briefing output
|
|
function compactBotMessage(msg) {
|
|
return {
|
|
text: msg.text || msg.caption || '',
|
|
date: msg.date ? new Date(msg.date * 1000).toISOString() : null,
|
|
chat: msg.chat?.title || msg.chat?.username || 'unknown',
|
|
views: msg.views || 0,
|
|
hasMedia: !!(msg.photo || msg.video || msg.document),
|
|
};
|
|
}
|
|
|
|
// Fetch updates via Bot API and organize by channel
|
|
async function fetchBotUpdates() {
|
|
const result = await getUpdates({ limit: 100 });
|
|
if (!result?.ok || !Array.isArray(result.result)) {
|
|
return { error: result?.description || 'Bot API request failed' };
|
|
}
|
|
|
|
const messages = result.result
|
|
.map(u => u.message || u.channel_post || u.edited_channel_post)
|
|
.filter(Boolean)
|
|
.map(compactBotMessage);
|
|
|
|
return { messages, count: messages.length };
|
|
}
|
|
|
|
// ─── Web preview scraping fallback ──────────────────────────────────────────
|
|
|
|
// Fetch raw HTML from a URL (safeFetch truncates non-JSON to 500 chars, too short)
|
|
async function fetchHTML(url, timeoutMs = 15000) {
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
try {
|
|
const res = await fetch(url, {
|
|
signal: controller.signal,
|
|
headers: {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept-Language': 'en-US,en;q=0.9',
|
|
},
|
|
});
|
|
clearTimeout(timer);
|
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
return await res.text();
|
|
} catch (e) {
|
|
clearTimeout(timer);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Parse messages from Telegram web preview HTML (https://t.me/s/channel)
|
|
// The HTML contains <div class="tgme_widget_message_wrap"> blocks with message content.
|
|
function parseWebPreview(html, channelId) {
|
|
if (!html) return [];
|
|
|
|
const messages = [];
|
|
|
|
// Each message sits inside a tgme_widget_message_wrap div
|
|
// We extract using the data-post attribute which has the format "channel/msgId"
|
|
const msgBlockRegex = /class="tgme_widget_message_wrap[^"]*"[\s\S]*?data-post="([^"]*)"([\s\S]*?)(?=class="tgme_widget_message_wrap|$)/gi;
|
|
// Simpler: split on message boundaries using data-post
|
|
const postRegex = /data-post="([^"]+)"([\s\S]*?)(?=data-post="|$)/gi;
|
|
|
|
let match;
|
|
while ((match = postRegex.exec(html)) !== null && messages.length < 20) {
|
|
const postId = match[1]; // e.g. "intelslava/12345"
|
|
const block = match[2];
|
|
|
|
// Extract message text from tgme_widget_message_text
|
|
const textMatch = block.match(/class="tgme_widget_message_text[^"]*"[^>]*>([\s\S]*?)<\/div>/i);
|
|
let text = '';
|
|
if (textMatch) {
|
|
text = textMatch[1]
|
|
.replace(/<br\s*\/?>/gi, '\n') // preserve line breaks
|
|
.replace(/<[^>]+>/g, '') // strip HTML tags
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/�*39;/g, "'")
|
|
.replace(/�*27;/gi, "'")
|
|
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
|
|
.replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16)))
|
|
.replace(/ /g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
// Extract view count
|
|
const viewsMatch = block.match(/class="tgme_widget_message_views"[^>]*>([\s\S]*?)<\/span>/i);
|
|
let views = 0;
|
|
if (viewsMatch) {
|
|
const raw = viewsMatch[1].trim();
|
|
if (raw.endsWith('K')) views = parseFloat(raw) * 1000;
|
|
else if (raw.endsWith('M')) views = parseFloat(raw) * 1000000;
|
|
else views = parseInt(raw, 10) || 0;
|
|
}
|
|
|
|
// Extract datetime
|
|
const timeMatch = block.match(/datetime="([^"]+)"/i);
|
|
const date = timeMatch ? timeMatch[1] : null;
|
|
|
|
// Check for media (photos, videos)
|
|
const hasMedia = /tgme_widget_message_photo|tgme_widget_message_video/i.test(block);
|
|
|
|
if (text || hasMedia) {
|
|
messages.push({
|
|
postId,
|
|
text,
|
|
date,
|
|
views,
|
|
hasMedia,
|
|
channel: channelId,
|
|
});
|
|
}
|
|
}
|
|
|
|
return messages;
|
|
}
|
|
|
|
// Scrape a single channel's web preview
|
|
async function scrapeChannel(channelId) {
|
|
const url = `https://t.me/s/${channelId}`;
|
|
const html = await fetchHTML(url);
|
|
if (!html) return { channel: channelId, error: 'Failed to fetch', posts: [] };
|
|
|
|
// Extract channel title from page
|
|
const titleMatch = html.match(/class="tgme_channel_info_header_title[^"]*"[^>]*>([\s\S]*?)<\/span>/i)
|
|
|| html.match(/<title>(.*?)<\/title>/i);
|
|
const title = titleMatch
|
|
? titleMatch[1].replace(/<[^>]+>/g, '').trim()
|
|
: channelId;
|
|
|
|
const posts = parseWebPreview(html, channelId);
|
|
|
|
return { channel: channelId, title, posts, postCount: posts.length };
|
|
}
|
|
|
|
// ─── Analysis helpers ───────────────────────────────────────────────────────
|
|
|
|
// Flag urgent/high-priority posts
|
|
function flagUrgent(post) {
|
|
const lower = (post.text || '').toLowerCase();
|
|
const matched = URGENT_KEYWORDS.filter(k => lower.includes(k));
|
|
return matched.length > 0 ? matched : null;
|
|
}
|
|
|
|
// Score a post's significance (views + urgency + length)
|
|
function significanceScore(post) {
|
|
let score = 0;
|
|
score += Math.min(post.views / 1000, 50); // views weight (capped)
|
|
const urgentFlags = flagUrgent(post);
|
|
if (urgentFlags) score += urgentFlags.length * 10; // urgency weight
|
|
if (post.text?.length > 100) score += 5; // substantive text bonus
|
|
if (post.hasMedia) score += 3; // media bonus
|
|
return score;
|
|
}
|
|
|
|
// Group posts by topic based on the channel config
|
|
function groupByTopic(allPosts, channelMeta) {
|
|
const groups = {};
|
|
for (const post of allPosts) {
|
|
const meta = channelMeta.find(c => c.id === post.channel);
|
|
const topic = meta?.topic || 'other';
|
|
if (!groups[topic]) groups[topic] = [];
|
|
groups[topic].push(post);
|
|
}
|
|
return groups;
|
|
}
|
|
|
|
// ─── Briefing ───────────────────────────────────────────────────────────────
|
|
|
|
export async function briefing() {
|
|
const token = process.env.TELEGRAM_BOT_TOKEN;
|
|
|
|
// Try Bot API first if token is available
|
|
if (token) {
|
|
try {
|
|
const botData = await fetchBotUpdates();
|
|
if (!botData.error && botData.count > 0) {
|
|
const enriched = botData.messages.map(m => ({
|
|
...m,
|
|
urgentFlags: flagUrgent(m),
|
|
score: significanceScore(m),
|
|
}));
|
|
|
|
const urgent = enriched.filter(m => m.urgentFlags).sort((a, b) => b.score - a.score);
|
|
const top = enriched.sort((a, b) => b.score - a.score).slice(0, 15);
|
|
|
|
return {
|
|
source: 'Telegram',
|
|
timestamp: new Date().toISOString(),
|
|
status: 'bot_api',
|
|
totalMessages: botData.count,
|
|
urgentPosts: urgent.slice(0, 10),
|
|
topPosts: top,
|
|
note: 'Data from Bot API getUpdates. Bot must be added to channels to receive posts.',
|
|
};
|
|
}
|
|
// If bot returned no messages, fall through to web scraping
|
|
} catch { /* fall through to scraping */ }
|
|
}
|
|
|
|
// Fallback: scrape public channel web previews (no auth needed)
|
|
const results = [];
|
|
const errors = [];
|
|
|
|
// Fetch channels in batches of 3 to avoid rate limiting
|
|
for (let i = 0; i < CHANNELS.length; i += 3) {
|
|
const batch = CHANNELS.slice(i, i + 3);
|
|
const batchResults = await Promise.all(
|
|
batch.map(ch => scrapeChannel(ch.id))
|
|
);
|
|
results.push(...batchResults);
|
|
|
|
// Delay between batches to be respectful
|
|
if (i + 3 < CHANNELS.length) await delay(1500);
|
|
}
|
|
|
|
// Collect all posts and separate errors
|
|
const allPosts = [];
|
|
const channelSummaries = [];
|
|
|
|
for (const r of results) {
|
|
const meta = CHANNELS.find(c => c.id === r.channel);
|
|
if (r.error) {
|
|
errors.push({ channel: r.channel, error: r.error });
|
|
}
|
|
// Enrich posts with urgency flags and scores
|
|
const enriched = (r.posts || []).map(p => ({
|
|
...p,
|
|
urgentFlags: flagUrgent(p),
|
|
score: significanceScore(p),
|
|
}));
|
|
allPosts.push(...enriched);
|
|
|
|
channelSummaries.push({
|
|
channel: r.channel,
|
|
title: r.title || meta?.label || r.channel,
|
|
topic: meta?.topic || 'other',
|
|
postCount: r.postCount || 0,
|
|
reachable: !r.error,
|
|
});
|
|
}
|
|
|
|
// Sort all posts by significance
|
|
allPosts.sort((a, b) => b.score - a.score);
|
|
|
|
// Separate urgent posts
|
|
const urgentPosts = allPosts.filter(p => p.urgentFlags).slice(0, 15);
|
|
|
|
// Group by topic
|
|
const byTopic = groupByTopic(allPosts, CHANNELS);
|
|
const topicSummary = {};
|
|
for (const [topic, posts] of Object.entries(byTopic)) {
|
|
topicSummary[topic] = {
|
|
totalPosts: posts.length,
|
|
urgentCount: posts.filter(p => p.urgentFlags).length,
|
|
topPosts: posts.sort((a, b) => b.score - a.score).slice(0, 5),
|
|
};
|
|
}
|
|
|
|
return {
|
|
source: 'Telegram',
|
|
timestamp: new Date().toISOString(),
|
|
status: token ? 'bot_api_empty_fallback_scrape' : 'web_scrape',
|
|
method: 'Public channel web preview scraping (no auth required)',
|
|
channelsMonitored: channelSummaries.length,
|
|
channelsReachable: channelSummaries.filter(c => c.reachable).length,
|
|
totalPosts: allPosts.length,
|
|
urgentPosts,
|
|
byTopic: topicSummary,
|
|
channels: channelSummaries,
|
|
errors: errors.length > 0 ? errors : undefined,
|
|
topPosts: allPosts.slice(0, 15),
|
|
hint: token
|
|
? undefined
|
|
: 'Set TELEGRAM_BOT_TOKEN in .env for Bot API access. Create a bot via @BotFather on Telegram.',
|
|
};
|
|
}
|
|
|
|
// ─── CLI runner ─────────────────────────────────────────────────────────────
|
|
|
|
if (process.argv[1]?.endsWith('telegram.mjs')) {
|
|
console.log('Telegram OSINT — fetching public channel intelligence...\n');
|
|
const data = await briefing();
|
|
console.log(JSON.stringify(data, null, 2));
|
|
}
|