fix: infer source fetch metrics #35

Merged
4 changed files with 90 additions and 4 deletions

View File

@@ -249,6 +249,8 @@ Recommended proxy settings:
If you raise the heartbeat interval, keep it shorter than the lowest idle timeout in the proxy chain. If you raise the heartbeat interval, keep it shorter than the lowest idle timeout in the proxy chain.
`/api/metrics` includes network health grouped by host and source/provider. Source modules should use `safeFetch(url, { source: 'SourceName' })`; when omitted, the shared helper infers a stable provider bucket from the URL host instead of grouping normal source traffic under `unknown`. Raw fetch exceptions are documented in [Source Fetch Instrumentation](docs/source-fetch-instrumentation.md).
#### Scenario Watchlist #### Scenario Watchlist
Intelligence Terminal can track operator hypotheses across sweeps with a runtime scenario file at `runs/scenarios.json`. On first run, the server creates three disabled starter examples: Intelligence Terminal can track operator hypotheses across sweeps with a runtime scenario file at `runs/scenarios.json`. On first run, the server creates three disabled starter examples:

View File

@@ -10,6 +10,44 @@ const fetchMetrics = {
recent: [], recent: [],
}; };
const SOURCE_BY_HOST = [
[/api\.bls\.gov$/i, 'BLS'],
[/api\.fred\.stlouisfed\.org$/i, 'FRED'],
[/api\.eia\.gov$/i, 'EIA'],
[/api\.gdeltproject\.org$/i, 'GDELT'],
[/api\.weather\.gov$/i, 'NOAA'],
[/api\.open-notify\.org$/i, 'OpenNotify'],
[/opensky-network\.org$/i, 'OpenSky'],
[/firms\.modaps\.eosdis\.nasa\.gov$/i, 'FIRMS'],
[/api\.acleddata\.com$/i, 'ACLED'],
[/api\.reliefweb\.int$/i, 'ReliefWeb'],
[/receiverbook\.de$/i, 'KiwiSDR'],
[/safecast\.org$/i, 'Safecast'],
[/api\.patentsview\.org$/i, 'PatentsView'],
[/api\.trade\.gov$/i, 'Comtrade'],
[/api\.usaspending\.gov$/i, 'USASpending'],
[/api\.telegram\.org$/i, 'Telegram'],
[/oauth\.reddit\.com$/i, 'Reddit'],
[/reddit\.com$/i, 'Reddit'],
[/api\.bsky\.app$/i, 'Bluesky'],
[/api\.yahoo\.com$/i, 'YahooFinance'],
[/query\d?\.finance\.yahoo\.com$/i, 'YahooFinance'],
[/api\.cloudflare\.com$/i, 'CloudflareRadar'],
[/api\.opensanctions\.org$/i, 'OpenSanctions'],
[/home\.treasury\.gov$/i, 'Treasury'],
[/fiscaldata\.treasury\.gov$/i, 'Treasury'],
[/who\.int$/i, 'WHO'],
];
export function inferFetchSource(url) {
let host = 'unknown';
try { host = new URL(url).host.toLowerCase(); } catch { return 'unknown'; }
for (const [pattern, source] of SOURCE_BY_HOST) {
if (pattern.test(host)) return source;
}
return host;
}
function metricBucket(map, key) { function metricBucket(map, key) {
if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 }; if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 };
return map[key]; return map[key];
@@ -38,7 +76,7 @@ export function getFetchMetrics() {
} }
export async function safeFetch(url, opts = {}) { export async function safeFetch(url, opts = {}) {
const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts;
let lastError; let lastError;
for (let i = 0; i <= retries; i++) { for (let i = 0; i <= retries; i++) {
const started = Date.now(); const started = Date.now();
@@ -79,11 +117,11 @@ export async function safeFetch(url, opts = {}) {
if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1))); if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1)));
} }
} }
return { error: lastError?.message || 'Unknown error', source: url }; return { error: lastError?.message || 'Unknown error', source };
} }
export async function safeFetchText(url, opts = {}) { export async function safeFetchText(url, opts = {}) {
const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts;
let lastError; let lastError;
for (let i = 0; i <= retries; i++) { for (let i = 0; i <= retries; i++) {
const started = Date.now(); const started = Date.now();

View File

@@ -0,0 +1,21 @@
# Source Fetch Instrumentation
`safeFetch()` and `safeFetchText()` attribute requests to `/api/metrics.fetch.bySource`.
Rules:
- Prefer passing an explicit `source` option from source modules when the call has a clear Crucix source name.
- If `source` is omitted, the shared helper infers a stable provider name from the request host.
- Unknown hosts fall back to the lowercase host instead of the old `unknown` bucket.
- Raw `fetch()` calls should be limited to cases where the shared helper cannot represent the protocol cleanly.
Current raw-fetch exceptions:
| Area | Reason |
| --- | --- |
| OAuth/session handshakes | Token exchange calls often need custom form bodies, credential headers, or status-specific diagnostics. |
| Bot and alert delivery | Telegram/Discord alert calls are outbound operator notifications, not intelligence source health. |
| LLM providers | Provider clients already track model/provider status separately from source fetch health. |
| Dashboard browser calls | Browser-side `/api/*` and asset fetches are UI behavior, not source provider health. |
When adding a new intelligence source, use `safeFetch(url, { source: 'SourceName' })` unless there is a documented exception.

View File

@@ -1,7 +1,7 @@
import test from 'node:test'; import test from 'node:test';
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs'; import { readFileSync } from 'node:fs';
import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; import { safeFetch, safeFetchText, getFetchMetrics, inferFetchSource } from '../apis/utils/fetch.mjs';
import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs'; import { formatStaleAlert, shouldSendStaleAlert } from '../lib/stale-alerts.mjs';
test('safeFetch reports HTML as degraded JSON response', async () => { test('safeFetch reports HTML as degraded JSON response', async () => {
@@ -101,6 +101,31 @@ test('safeFetchText returns text and byte count', async () => {
} }
}); });
test('safeFetch attributes unlabelled requests to a stable provider source', async () => {
const originalFetch = globalThis.fetch;
globalThis.fetch = async () => ({
ok: true,
status: 200,
headers: { get: () => 'application/json' },
text: async () => '{"observations":[]}',
});
try {
const data = await safeFetch('https://api.fred.stlouisfed.org/fred/series/observations?series_id=VIXCLS', { retries: 0 });
assert.deepEqual(data, { observations: [] });
const bucket = getFetchMetrics().bySource.FRED;
assert.ok(bucket.requests >= 1);
assert.equal(bucket.lastStatus, 200);
} finally {
globalThis.fetch = originalFetch;
}
});
test('inferFetchSource returns provider names and host fallback', () => {
assert.equal(inferFetchSource('https://api.bls.gov/publicAPI/v2/timeseries/data/CPI'), 'BLS');
assert.equal(inferFetchSource('https://query1.finance.yahoo.com/v8/finance/chart/%5EGSPC'), 'YahooFinance');
assert.equal(inferFetchSource('https://unknown.example.test/path'), 'unknown.example.test');
});
test('SSE endpoint sends reconnect guidance and clears heartbeat timer', () => { test('SSE endpoint sends reconnect guidance and clears heartbeat timer', () => {
const server = readFileSync(new URL('../server.mjs', import.meta.url), 'utf8'); const server = readFileSync(new URL('../server.mjs', import.meta.url), 'utf8');
const config = readFileSync(new URL('../crucix.config.mjs', import.meta.url), 'utf8'); const config = readFileSync(new URL('../crucix.config.mjs', import.meta.url), 'utf8');