Files
intelligence-terminal/apis/utils/fetch.mjs
MrSphay 2025ae09db
All checks were successful
Codex Template Compliance / template-compliance (pull_request) Successful in 4s
Build / test-and-image (pull_request) Successful in 53s
fix: infer source fetch metrics
2026-05-17 14:44:21 +02:00

154 lines
5.8 KiB
JavaScript

// Shared fetch utility with timeout, retries, metrics, and error handling
const fetchMetrics = {
requests: 0,
ok: 0,
failed: 0,
bytes: 0,
byHost: {},
bySource: {},
recent: [],
};
const SOURCE_BY_HOST = [
[/api\.bls\.gov$/i, 'BLS'],
[/api\.fred\.stlouisfed\.org$/i, 'FRED'],
[/api\.eia\.gov$/i, 'EIA'],
[/api\.gdeltproject\.org$/i, 'GDELT'],
[/api\.weather\.gov$/i, 'NOAA'],
[/api\.open-notify\.org$/i, 'OpenNotify'],
[/opensky-network\.org$/i, 'OpenSky'],
[/firms\.modaps\.eosdis\.nasa\.gov$/i, 'FIRMS'],
[/api\.acleddata\.com$/i, 'ACLED'],
[/api\.reliefweb\.int$/i, 'ReliefWeb'],
[/receiverbook\.de$/i, 'KiwiSDR'],
[/safecast\.org$/i, 'Safecast'],
[/api\.patentsview\.org$/i, 'PatentsView'],
[/api\.trade\.gov$/i, 'Comtrade'],
[/api\.usaspending\.gov$/i, 'USASpending'],
[/api\.telegram\.org$/i, 'Telegram'],
[/oauth\.reddit\.com$/i, 'Reddit'],
[/reddit\.com$/i, 'Reddit'],
[/api\.bsky\.app$/i, 'Bluesky'],
[/api\.yahoo\.com$/i, 'YahooFinance'],
[/query\d?\.finance\.yahoo\.com$/i, 'YahooFinance'],
[/api\.cloudflare\.com$/i, 'CloudflareRadar'],
[/api\.opensanctions\.org$/i, 'OpenSanctions'],
[/home\.treasury\.gov$/i, 'Treasury'],
[/fiscaldata\.treasury\.gov$/i, 'Treasury'],
[/who\.int$/i, 'WHO'],
];
export function inferFetchSource(url) {
let host = 'unknown';
try { host = new URL(url).host.toLowerCase(); } catch { return 'unknown'; }
for (const [pattern, source] of SOURCE_BY_HOST) {
if (pattern.test(host)) return source;
}
return host;
}
function metricBucket(map, key) {
if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 };
return map[key];
}
function recordFetchMetric({ url, source = 'unknown', ok, status, bytes, durationMs, error }) {
let host = 'unknown';
try { host = new URL(url).host; } catch { }
fetchMetrics.requests++;
fetchMetrics.bytes += bytes || 0;
if (ok) fetchMetrics.ok++; else fetchMetrics.failed++;
for (const bucket of [metricBucket(fetchMetrics.byHost, host), metricBucket(fetchMetrics.bySource, source)]) {
bucket.requests++;
bucket.bytes += bytes || 0;
bucket.lastStatus = status || null;
bucket.lastMs = durationMs || 0;
bucket.lastError = error || null;
if (ok) bucket.ok++; else bucket.failed++;
}
fetchMetrics.recent.unshift({ at: new Date().toISOString(), source, host, ok, status, bytes: bytes || 0, durationMs, error: error || null });
fetchMetrics.recent = fetchMetrics.recent.slice(0, 100);
}
export function getFetchMetrics() {
return JSON.parse(JSON.stringify(fetchMetrics));
}
export async function safeFetch(url, opts = {}) {
const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts;
let lastError;
for (let i = 0; i <= retries; i++) {
const started = Date.now();
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeout);
const res = await fetch(url, {
signal: controller.signal,
headers: { 'User-Agent': 'Crucix/1.0', ...headers },
});
clearTimeout(timer);
const status = res.status;
if (!res.ok) {
const body = await res.text().catch(() => '');
recordFetchMetric({ url, source, ok: false, status, bytes: body.length, durationMs: Date.now() - started, error: `HTTP ${res.status}` });
throw new Error(`HTTP ${res.status}: ${body.slice(0, 200)}`);
}
const text = await res.text();
recordFetchMetric({ url, source, ok: true, status, bytes: text.length, durationMs: Date.now() - started });
const trimmed = text.trim();
const contentType = res.headers.get('content-type') || '';
if (contentType.includes('text/html') || trimmed.startsWith('<!DOCTYPE html') || trimmed.startsWith('<html')) {
throw new Error(`Expected JSON but received HTML from ${new URL(url).host}`);
}
try { return JSON.parse(text); } catch { return { rawText: text.slice(0, 500) }; }
} catch (e) {
lastError = e;
recordFetchMetric({ url, source, ok: false, status: null, bytes: 0, durationMs: Date.now() - started, error: e.message });
// GDELT needs 5s between requests, others are fine with shorter delays
if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1)));
}
}
return { error: lastError?.message || 'Unknown error', source };
}
export async function safeFetchText(url, opts = {}) {
const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts;
let lastError;
for (let i = 0; i <= retries; i++) {
const started = Date.now();
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeout);
const res = await fetch(url, {
signal: controller.signal,
headers: { 'User-Agent': 'Crucix/1.0', ...headers },
});
clearTimeout(timer);
const text = await res.text();
recordFetchMetric({ url, source, ok: res.ok, status: res.status, bytes: text.length, durationMs: Date.now() - started, error: res.ok ? null : `HTTP ${res.status}` });
if (!res.ok) throw new Error(`HTTP ${res.status}: ${text.slice(0, 200)}`);
return { text, status: res.status, bytes: text.length };
} catch (e) {
lastError = e;
recordFetchMetric({ url, source, ok: false, status: null, bytes: 0, durationMs: Date.now() - started, error: e.message });
if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1)));
}
}
return { error: lastError?.message || 'Unknown error' };
}
export function ago(hours) {
return new Date(Date.now() - hours * 3600000).toISOString();
}
export function today() {
return new Date().toISOString().split('T')[0];
}
export function daysAgo(n) {
const d = new Date();
d.setDate(d.getDate() - n);
return d.toISOString().split('T')[0];
}