diff --git a/README.md b/README.md index 6c311c9..a9bb0ff 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,8 @@ Recommended proxy settings: If you raise the heartbeat interval, keep it shorter than the lowest idle timeout in the proxy chain. +`/api/metrics` includes network health grouped by host and source/provider. Source modules should use `safeFetch(url, { source: 'SourceName' })`; when omitted, the shared helper infers a stable provider bucket from the URL host instead of grouping normal source traffic under `unknown`. Raw fetch exceptions are documented in [Source Fetch Instrumentation](docs/source-fetch-instrumentation.md). + #### Scenario Watchlist Intelligence Terminal can track operator hypotheses across sweeps with a runtime scenario file at `runs/scenarios.json`. On first run, the server creates three disabled starter examples: diff --git a/apis/utils/fetch.mjs b/apis/utils/fetch.mjs index ec8da38..6d7a3b3 100644 --- a/apis/utils/fetch.mjs +++ b/apis/utils/fetch.mjs @@ -10,6 +10,44 @@ const fetchMetrics = { recent: [], }; +const SOURCE_BY_HOST = [ + [/api\.bls\.gov$/i, 'BLS'], + [/api\.fred\.stlouisfed\.org$/i, 'FRED'], + [/api\.eia\.gov$/i, 'EIA'], + [/api\.gdeltproject\.org$/i, 'GDELT'], + [/api\.weather\.gov$/i, 'NOAA'], + [/api\.open-notify\.org$/i, 'OpenNotify'], + [/opensky-network\.org$/i, 'OpenSky'], + [/firms\.modaps\.eosdis\.nasa\.gov$/i, 'FIRMS'], + [/api\.acleddata\.com$/i, 'ACLED'], + [/api\.reliefweb\.int$/i, 'ReliefWeb'], + [/receiverbook\.de$/i, 'KiwiSDR'], + [/safecast\.org$/i, 'Safecast'], + [/api\.patentsview\.org$/i, 'PatentsView'], + [/api\.trade\.gov$/i, 'Comtrade'], + [/api\.usaspending\.gov$/i, 'USASpending'], + [/api\.telegram\.org$/i, 'Telegram'], + [/oauth\.reddit\.com$/i, 'Reddit'], + [/reddit\.com$/i, 'Reddit'], + [/api\.bsky\.app$/i, 'Bluesky'], + [/api\.yahoo\.com$/i, 'YahooFinance'], + [/query\d?\.finance\.yahoo\.com$/i, 'YahooFinance'], + [/api\.cloudflare\.com$/i, 'CloudflareRadar'], + [/api\.opensanctions\.org$/i, 'OpenSanctions'], + [/home\.treasury\.gov$/i, 'Treasury'], + [/fiscaldata\.treasury\.gov$/i, 'Treasury'], + [/who\.int$/i, 'WHO'], +]; + +export function inferFetchSource(url) { + let host = 'unknown'; + try { host = new URL(url).host.toLowerCase(); } catch { return 'unknown'; } + for (const [pattern, source] of SOURCE_BY_HOST) { + if (pattern.test(host)) return source; + } + return host; +} + function metricBucket(map, key) { if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 }; return map[key]; @@ -38,7 +76,7 @@ export function getFetchMetrics() { } export async function safeFetch(url, opts = {}) { - const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; + const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts; let lastError; for (let i = 0; i <= retries; i++) { const started = Date.now(); @@ -79,11 +117,11 @@ export async function safeFetch(url, opts = {}) { if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1))); } } - return { error: lastError?.message || 'Unknown error', source: url }; + return { error: lastError?.message || 'Unknown error', source }; } export async function safeFetchText(url, opts = {}) { - const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; + const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts; let lastError; for (let i = 0; i <= retries; i++) { const started = Date.now(); diff --git a/docs/source-fetch-instrumentation.md b/docs/source-fetch-instrumentation.md new file mode 100644 index 0000000..6daf894 --- /dev/null +++ b/docs/source-fetch-instrumentation.md @@ -0,0 +1,21 @@ +# Source Fetch Instrumentation + +`safeFetch()` and `safeFetchText()` attribute requests to `/api/metrics.fetch.bySource`. + +Rules: + +- Prefer passing an explicit `source` option from source modules when the call has a clear Crucix source name. +- If `source` is omitted, the shared helper infers a stable provider name from the request host. +- Unknown hosts fall back to the lowercase host instead of the old `unknown` bucket. +- Raw `fetch()` calls should be limited to cases where the shared helper cannot represent the protocol cleanly. + +Current raw-fetch exceptions: + +| Area | Reason | +| --- | --- | +| OAuth/session handshakes | Token exchange calls often need custom form bodies, credential headers, or status-specific diagnostics. | +| Bot and alert delivery | Telegram/Discord alert calls are outbound operator notifications, not intelligence source health. | +| LLM providers | Provider clients already track model/provider status separately from source fetch health. | +| Dashboard browser calls | Browser-side `/api/*` and asset fetches are UI behavior, not source provider health. | + +When adding a new intelligence source, use `safeFetch(url, { source: 'SourceName' })` unless there is a documented exception. diff --git a/test/fetch-utils.test.mjs b/test/fetch-utils.test.mjs index f00752b..a469796 100644 --- a/test/fetch-utils.test.mjs +++ b/test/fetch-utils.test.mjs @@ -1,7 +1,7 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { readFileSync } from 'node:fs'; -import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; +import { safeFetch, safeFetchText, getFetchMetrics, inferFetchSource } from '../apis/utils/fetch.mjs'; import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs'; test('safeFetch reports HTML as degraded JSON response', async () => { @@ -101,6 +101,31 @@ test('safeFetchText returns text and byte count', async () => { } }); +test('safeFetch attributes unlabelled requests to a stable provider source', async () => { + const originalFetch = globalThis.fetch; + globalThis.fetch = async () => ({ + ok: true, + status: 200, + headers: { get: () => 'application/json' }, + text: async () => '{"observations":[]}', + }); + try { + const data = await safeFetch('https://api.fred.stlouisfed.org/fred/series/observations?series_id=VIXCLS', { retries: 0 }); + assert.deepEqual(data, { observations: [] }); + const bucket = getFetchMetrics().bySource.FRED; + assert.ok(bucket.requests >= 1); + assert.equal(bucket.lastStatus, 200); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test('inferFetchSource returns provider names and host fallback', () => { + assert.equal(inferFetchSource('https://api.bls.gov/publicAPI/v2/timeseries/data/CPI'), 'BLS'); + assert.equal(inferFetchSource('https://query1.finance.yahoo.com/v8/finance/chart/%5EGSPC'), 'YahooFinance'); + assert.equal(inferFetchSource('https://unknown.example.test/path'), 'unknown.example.test'); +}); + test('SSE endpoint sends reconnect guidance and clears heartbeat timer', () => { const server = readFileSync(new URL('../server.mjs', import.meta.url), 'utf8'); const config = readFileSync(new URL('../crucix.config.mjs', import.meta.url), 'utf8');