fix: infer source fetch metrics
All checks were successful
Codex Template Compliance / template-compliance (pull_request) Successful in 4s
Build / test-and-image (pull_request) Successful in 53s

This commit is contained in:
MrSphay
2026-05-17 14:44:21 +02:00
parent 8605d0baab
commit 2025ae09db
4 changed files with 90 additions and 4 deletions

View File

@@ -190,6 +190,8 @@ For Pangolin or another reverse proxy, forward HTTP traffic to `intelligence-ter
The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`. The dashboard Terminal Actions panel can trigger `status`, `sweep`, and `brief` through `/api/action`. Leave `TERMINAL_ACTIONS_ENABLED=true` for a private home-server deployment. For an internet-exposed deployment, set `SWEEP_TOKEN` and pass it through trusted automation, or set `TERMINAL_ACTIONS_ENABLED=false` to disable browser-triggered actions. If you protect actions with `SWEEP_TOKEN`, the browser can send it from `localStorage.crucix_sweep_token`.
`/api/metrics` includes network health grouped by host and source/provider. Source modules should use `safeFetch(url, { source: 'SourceName' })`; when omitted, the shared helper infers a stable provider bucket from the URL host instead of grouping normal source traffic under `unknown`. Raw fetch exceptions are documented in [Source Fetch Instrumentation](docs/source-fetch-instrumentation.md).
#### Build And Publish Your Gitea Image #### Build And Publish Your Gitea Image
```bash ```bash

View File

@@ -10,6 +10,44 @@ const fetchMetrics = {
recent: [], recent: [],
}; };
const SOURCE_BY_HOST = [
[/api\.bls\.gov$/i, 'BLS'],
[/api\.fred\.stlouisfed\.org$/i, 'FRED'],
[/api\.eia\.gov$/i, 'EIA'],
[/api\.gdeltproject\.org$/i, 'GDELT'],
[/api\.weather\.gov$/i, 'NOAA'],
[/api\.open-notify\.org$/i, 'OpenNotify'],
[/opensky-network\.org$/i, 'OpenSky'],
[/firms\.modaps\.eosdis\.nasa\.gov$/i, 'FIRMS'],
[/api\.acleddata\.com$/i, 'ACLED'],
[/api\.reliefweb\.int$/i, 'ReliefWeb'],
[/receiverbook\.de$/i, 'KiwiSDR'],
[/safecast\.org$/i, 'Safecast'],
[/api\.patentsview\.org$/i, 'PatentsView'],
[/api\.trade\.gov$/i, 'Comtrade'],
[/api\.usaspending\.gov$/i, 'USASpending'],
[/api\.telegram\.org$/i, 'Telegram'],
[/oauth\.reddit\.com$/i, 'Reddit'],
[/reddit\.com$/i, 'Reddit'],
[/api\.bsky\.app$/i, 'Bluesky'],
[/api\.yahoo\.com$/i, 'YahooFinance'],
[/query\d?\.finance\.yahoo\.com$/i, 'YahooFinance'],
[/api\.cloudflare\.com$/i, 'CloudflareRadar'],
[/api\.opensanctions\.org$/i, 'OpenSanctions'],
[/home\.treasury\.gov$/i, 'Treasury'],
[/fiscaldata\.treasury\.gov$/i, 'Treasury'],
[/who\.int$/i, 'WHO'],
];
export function inferFetchSource(url) {
let host = 'unknown';
try { host = new URL(url).host.toLowerCase(); } catch { return 'unknown'; }
for (const [pattern, source] of SOURCE_BY_HOST) {
if (pattern.test(host)) return source;
}
return host;
}
function metricBucket(map, key) { function metricBucket(map, key) {
if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 }; if (!map[key]) map[key] = { requests: 0, ok: 0, failed: 0, bytes: 0, lastStatus: null, lastError: null, lastMs: 0 };
return map[key]; return map[key];
@@ -38,7 +76,7 @@ export function getFetchMetrics() {
} }
export async function safeFetch(url, opts = {}) { export async function safeFetch(url, opts = {}) {
const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts;
let lastError; let lastError;
for (let i = 0; i <= retries; i++) { for (let i = 0; i <= retries; i++) {
const started = Date.now(); const started = Date.now();
@@ -71,11 +109,11 @@ export async function safeFetch(url, opts = {}) {
if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1))); if (i < retries) await new Promise(r => setTimeout(r, 2000 * (i + 1)));
} }
} }
return { error: lastError?.message || 'Unknown error', source: url }; return { error: lastError?.message || 'Unknown error', source };
} }
export async function safeFetchText(url, opts = {}) { export async function safeFetchText(url, opts = {}) {
const { timeout = 15000, retries = 1, headers = {}, source = undefined } = opts; const { timeout = 15000, retries = 1, headers = {}, source = inferFetchSource(url) } = opts;
let lastError; let lastError;
for (let i = 0; i <= retries; i++) { for (let i = 0; i <= retries; i++) {
const started = Date.now(); const started = Date.now();

View File

@@ -0,0 +1,21 @@
# Source Fetch Instrumentation
`safeFetch()` and `safeFetchText()` attribute requests to `/api/metrics.fetch.bySource`.
Rules:
- Prefer passing an explicit `source` option from source modules when the call has a clear Crucix source name.
- If `source` is omitted, the shared helper infers a stable provider name from the request host.
- Unknown hosts fall back to the lowercase host instead of the old `unknown` bucket.
- Raw `fetch()` calls should be limited to cases where the shared helper cannot represent the protocol cleanly.
Current raw-fetch exceptions:
| Area | Reason |
| --- | --- |
| OAuth/session handshakes | Token exchange calls often need custom form bodies, credential headers, or status-specific diagnostics. |
| Bot and alert delivery | Telegram/Discord alert calls are outbound operator notifications, not intelligence source health. |
| LLM providers | Provider clients already track model/provider status separately from source fetch health. |
| Dashboard browser calls | Browser-side `/api/*` and asset fetches are UI behavior, not source provider health. |
When adding a new intelligence source, use `safeFetch(url, { source: 'SourceName' })` unless there is a documented exception.

View File

@@ -1,6 +1,6 @@
import test from 'node:test'; import test from 'node:test';
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import { safeFetch, safeFetchText, getFetchMetrics } from '../apis/utils/fetch.mjs'; import { safeFetch, safeFetchText, getFetchMetrics, inferFetchSource } from '../apis/utils/fetch.mjs';
test('safeFetch reports HTML as degraded JSON response', async () => { test('safeFetch reports HTML as degraded JSON response', async () => {
const originalFetch = globalThis.fetch; const originalFetch = globalThis.fetch;
@@ -34,3 +34,28 @@ test('safeFetchText returns text and byte count', async () => {
globalThis.fetch = originalFetch; globalThis.fetch = originalFetch;
} }
}); });
test('safeFetch attributes unlabelled requests to a stable provider source', async () => {
const originalFetch = globalThis.fetch;
globalThis.fetch = async () => ({
ok: true,
status: 200,
headers: { get: () => 'application/json' },
text: async () => '{"observations":[]}',
});
try {
const data = await safeFetch('https://api.fred.stlouisfed.org/fred/series/observations?series_id=VIXCLS', { retries: 0 });
assert.deepEqual(data, { observations: [] });
const bucket = getFetchMetrics().bySource.FRED;
assert.ok(bucket.requests >= 1);
assert.equal(bucket.lastStatus, 200);
} finally {
globalThis.fetch = originalFetch;
}
});
test('inferFetchSource returns provider names and host fallback', () => {
assert.equal(inferFetchSource('https://api.bls.gov/publicAPI/v2/timeseries/data/CPI'), 'BLS');
assert.equal(inferFetchSource('https://query1.finance.yahoo.com/v8/finance/chart/%5EGSPC'), 'YahooFinance');
assert.equal(inferFetchSource('https://unknown.example.test/path'), 'unknown.example.test');
});