From 2025ae09db53d7f23d976d9e77074f7b783ef546 Mon Sep 17 00:00:00 2001 From: MrSphay Date: Sun, 17 May 2026 14:44:21 +0200 Subject: [PATCH] fix: infer source fetch metrics --- README.md | 2 ++ apis/utils/fetch.mjs | 44 ++++++++++++++++++++++++++-- docs/source-fetch-instrumentation.md | 21 +++++++++++++ test/fetch-utils.test.mjs | 27 ++++++++++++++++- 4 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 docs/source-fetch-instrumentation.md diff --git a/README.md b/README.md index 74e69f3..471818b 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,8 @@ For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-ter The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`. +`/api/metrics` includes network health grouped by host and source/provider. Source modules should use `safeFetch(url, { source: 'SourceName' })`; when omitted, the shared helper infers a stable provider bucket from the URL host instead of grouping normal source traffic under `unknown`. Raw fetch exceptions are documented in [Source Fetch Instrumentation](docs/source-fetch-instrumentation.md). + #### Build And Publish Your Gitea Image ```bash diff --git a/apis/utils/fetch.mjs b/apis/utils/fetch.mjs index 47b17d3..137756b 100644 --- a/apis/utils/fetch.mjs +++ b/apis/utils/fetch.mjs @@ -10,6 +10,44 @@ const fetchMetrics = { recent: [], }; +const SOURCE_BY_HOST = [ + [/api\.bls\.gov$/i, 'BLS'], + [/api\.fred\.stlouisfed\.org$/i, 'FRED'], + [/api\.eia\.gov$/i, 'EIA'], + [/api\.gdeltproject\.org$/i, 'GDELT'], + [/api\.weather\.gov$/i, 'NOAA'], + [/api\.open-notify\.org$/i, 'OpenNotify'], + [/opensky-network\.org$/i, 'OpenSky'], + [/firms\.modaps\.eosdis\.nasa\.gov$/i, 'FIRMS'], + [/api\.acleddata\.com$/i, 'ACLED'], + [/api\.reliefweb\.int$/i, 'ReliefWeb'], + [/receiverbook\.de$/i, 'KiwiSDR'], + [/safecast\.org$/i, 'Safecast'], + [/api\.patentsview\.org$/i, 'PatentsView'], + [/api\.trade\.gov$/i, 'Comtrade'], + [/api\.usaspending\.gov$/i, 'USASpending'], + [/api\.telegram\.org$/i, 'Telegram'], + [/oauth\.reddit\.com$/i, 'Reddit'], + [/reddit\.com$/i, 'Reddit'], + [/api\.bsky\.app$/i, 'Bluesky'], + [/api\.yahoo\.com$/i, 'YahooFinance'], + [/query\d?\.finance\.yahoo\.com$/i, 'YahooFinance'], + [/api\.cloudflare\.com$/i, 'CloudflareRadar'], + [/api\.opensanctions\.org$/i, 'OpenSanctions'], + [/home\.treasury\.gov$/i, 'Treasury'], + [/fiscaldata\.treasury\.gov$/i, 'Treasury'], + [/who\.int$/i, 'WHO'], +]; + +export function inferFetchSource(url) { + let host = 'unknown'; + try { host = new URL(url).host.toLowerCase(); } catch { return 'unknown'; } + for (const [pattern, source] of SOURCE_BY_HOST) { + if (pattern.test(host)) return source; + } + return host; +} + function metricBucket(map, key) { if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 }; return map[key]; @@ -38,7 +76,7 @@ export function getFetchMetrics() { } export async function safeFetch(url, opts = {}) { - const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; + const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts; let lastError; for (let i = 0; i <= retries; i++) { const started = Date.now(); @@ -71,11 +109,11 @@ export async function safeFetch(url, opts = {}) { if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1))); } } - return { error: lastError?.message || 'Unknown error', source: url }; + return { error: lastError?.message || 'Unknown error', source }; } export async function safeFetchText(url, opts = {}) { - const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; + const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts; let lastError; for (let i = 0; i <= retries; i++) { const started = Date.now(); diff --git a/docs/source-fetch-instrumentation.md b/docs/source-fetch-instrumentation.md new file mode 100644 index 0000000..6daf894 --- /dev/null +++ b/docs/source-fetch-instrumentation.md @@ -0,0 +1,21 @@ +# Source Fetch Instrumentation + +`safeFetch()` and `safeFetchText()` attribute requests to `/api/metrics.fetch.bySource`. + +Rules: + +- Prefer passing an explicit `source` option from source modules when the call has a clear Crucix source name. +- If `source` is omitted, the shared helper infers a stable provider name from the request host. +- Unknown hosts fall back to the lowercase host instead of the old `unknown` bucket. +- Raw `fetch()` calls should be limited to cases where the shared helper cannot represent the protocol cleanly. + +Current raw-fetch exceptions: + +| Area | Reason | +| --- | --- | +| OAuth/session handshakes | Token exchange calls often need custom form bodies, credential headers, or status-specific diagnostics. | +| Bot and alert delivery | Telegram/Discord alert calls are outbound operator notifications, not intelligence source health. | +| LLM providers | Provider clients already track model/provider status separately from source fetch health. | +| Dashboard browser calls | Browser-side `/api/*` and asset fetches are UI behavior, not source provider health. | + +When adding a new intelligence source, use `safeFetch(url, { source: 'SourceName' })` unless there is a documented exception. diff --git a/test/fetch-utils.test.mjs b/test/fetch-utils.test.mjs index 2dcee45..9274be8 100644 --- a/test/fetch-utils.test.mjs +++ b/test/fetch-utils.test.mjs @@ -1,6 +1,6 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; +import { safeFetch, safeFetchText, getFetchMetrics, inferFetchSource } from '../apis/utils/fetch.mjs'; test('safeFetch reports HTML as degraded JSON response', async () => { const originalFetch = globalThis.fetch; @@ -34,3 +34,28 @@ test('safeFetchText returns text and byte count', async () => { globalThis.fetch = originalFetch; } }); + +test('safeFetch attributes unlabelled requests to a stable provider source', async () => { + const originalFetch = globalThis.fetch; + globalThis.fetch = async () => ({ + ok: true, + status: 200, + headers: { get: () => 'application/json' }, + text: async () => '{"observations":[]}', + }); + try { + const data = await safeFetch('https://api.fred.stlouisfed.org/fred/series/observations?series_id=VIXCLS', { retries: 0 }); + assert.deepEqual(data, { observations: [] }); + const bucket = getFetchMetrics().bySource.FRED; + assert.ok(bucket.requests >= 1); + assert.equal(bucket.lastStatus, 200); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test('inferFetchSource returns provider names and host fallback', () => { + assert.equal(inferFetchSource('https://api.bls.gov/publicAPI/v2/timeseries/data/CPI'), 'BLS'); + assert.equal(inferFetchSource('https://query1.finance.yahoo.com/v8/finance/chart/%5EGSPC'), 'YahooFinance'); + assert.equal(inferFetchSource('https://unknown.example.test/path'), 'unknown.example.test'); +}); -- 2.49.1