From b2dee4e26132afe3eaf1242b40611041fcdcb7ab Mon Sep 17 00:00:00 2001 From: MrSphay Date: Sun, 17 May 2026 13:47:23 +0200 Subject: [PATCH] fix: require reddit oauth source access --- .env.example | 2 + README.md | 3 + apis/sources/reddit.mjs | 139 ++++++++++++++++++++++++++---------- docs/sources/README.md | 1 + docs/sources/reddit.md | 33 +++++++++ package.json | 2 +- test/reddit-source.test.mjs | 109 ++++++++++++++++++++++++++++ 7 files changed, 251 insertions(+), 38 deletions(-) create mode 100644 docs/sources/reddit.md create mode 100644 test/reddit-source.test.mjs diff --git a/.env.example b/.env.example index a862e47..6a1e88a 100644 --- a/.env.example +++ b/.env.example @@ -36,6 +36,8 @@ ACLED_EMAIL= ACLED_PASSWORD= CLOUDFLARE_API_TOKEN= BLS_API_KEY= +REDDIT_CLIENT_ID= +REDDIT_CLIENT_SECRET= # Telegram bot and alerts TELEGRAM_BOT_TOKEN= diff --git a/README.md b/README.md index 74e69f3..e5161c0 100644 --- a/README.md +++ b/README.md @@ -330,6 +330,9 @@ These three unlock the most valuable economic and satellite data. Each takes abo | `ACLED_EMAIL` + `ACLED_PASSWORD` | Armed conflict event data | [acleddata.com/register](https://acleddata.com/register/) — free, OAuth2 | | `AISSTREAM_API_KEY` | Maritime AIS vessel tracking | [aisstream.io](https://aisstream.io/) — free | | `ADSB_API_KEY` | Unfiltered flight tracking | [RapidAPI](https://rapidapi.com/adsbexchange/api/adsbexchange-com1) — ~$10/mo | +| `REDDIT_CLIENT_ID` + `REDDIT_CLIENT_SECRET` | Reddit social sentiment | [reddit.com/prefs/apps](https://www.reddit.com/prefs/apps/) — create a script app | + +Reddit is OAuth-only in this fork. If the Reddit credentials are missing or rejected, the Reddit source is reported as degraded and no unauthenticated `reddit.com/.../hot.json` fallback is used. ### LLM Provider (optional, for AI-enhanced ideas) diff --git a/apis/sources/reddit.mjs b/apis/sources/reddit.mjs index 29606cf..c6d17e0 100644 --- a/apis/sources/reddit.mjs +++ b/apis/sources/reddit.mjs @@ -1,14 +1,15 @@ -// Reddit — social sentiment intelligence -// Reddit now requires OAuth for API access (public JSON API returns 403). -// Gracefully degrades when not authenticated. -// To enable: register an app at https://www.reddit.com/prefs/apps/ and set -// REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET in .env +// Reddit social sentiment intelligence. +// Reddit API access requires OAuth. Runtime sweeps intentionally do not use +// unauthenticated reddit.com .json scraping because it is unreliable and not +// acceptable for production operation. import { safeFetch } from '../utils/fetch.mjs'; import '../utils/env.mjs'; function delay(ms) { return new Promise(r => setTimeout(r, ms)); } +const USER_AGENT = 'Crucix/2.0 intelligence-engine'; + const SUBREDDITS = [ 'worldnews', 'geopolitics', @@ -17,48 +18,95 @@ const SUBREDDITS = [ 'commodities', ]; -// Get OAuth token using client credentials flow (application-only) -async function getToken() { - const clientId = process.env.REDDIT_CLIENT_ID; - const clientSecret = process.env.REDDIT_CLIENT_SECRET; - if (!clientId || !clientSecret) return null; +export function getRedditConfig(env = process.env) { + const clientId = env.REDDIT_CLIENT_ID || ''; + const clientSecret = env.REDDIT_CLIENT_SECRET || ''; + const missing = []; + if (!clientId) missing.push('REDDIT_CLIENT_ID'); + if (!clientSecret) missing.push('REDDIT_CLIENT_SECRET'); + return { + clientId, + clientSecret, + configured: missing.length === 0, + missing, + }; +} + +function credentialsMessage(missing) { + return `Reddit requires OAuth. Register a script app at https://www.reddit.com/prefs/apps/ and set ${missing.join(' and ')} in .env`; +} + +export async function getToken({ env = process.env, fetchImpl = globalThis.fetch } = {}) { + const config = getRedditConfig(env); + if (!config.configured) { + return { + ok: false, + status: 'no_credentials', + missing: config.missing, + error: 'missing_reddit_oauth_credentials', + message: credentialsMessage(config.missing), + }; + } try { - const auth = Buffer.from(`${clientId}:${clientSecret}`).toString('base64'); - const res = await fetch('https://www.reddit.com/api/v1/access_token', { + const auth = Buffer.from(`${config.clientId}:${config.clientSecret}`).toString('base64'); + const res = await fetchImpl('https://www.reddit.com/api/v1/access_token', { method: 'POST', headers: { 'Authorization': `Basic ${auth}`, 'Content-Type': 'application/x-www-form-urlencoded', - 'User-Agent': 'Crucix/1.0 intelligence-engine', + 'User-Agent': USER_AGENT, }, body: 'grant_type=client_credentials', }); - if (!res.ok) return null; + if (!res.ok) { + const body = await res.text().catch(() => ''); + return { + ok: false, + status: 'auth_failed', + error: `reddit_oauth_http_${res.status}`, + message: `Reddit OAuth token request failed with HTTP ${res.status}`, + detail: body.slice(0, 200), + }; + } + const data = await res.json(); - return data.access_token || null; - } catch { - return null; + if (!data.access_token) { + return { + ok: false, + status: 'auth_failed', + error: 'reddit_oauth_missing_access_token', + message: 'Reddit OAuth token response did not include an access token', + }; + } + return { ok: true, status: 'ok', token: data.access_token }; + } catch (e) { + return { + ok: false, + status: 'auth_failed', + error: 'reddit_oauth_request_failed', + message: e.message, + }; } } -// Fetch hot posts — tries OAuth first, then falls back to public endpoint export async function getHot(subreddit, opts = {}) { const { limit = 10, token = null } = opts; - if (token) { - // Use OAuth endpoint - return safeFetch(`https://oauth.reddit.com/r/${subreddit}/hot?limit=${limit}&raw_json=1`, { - headers: { - 'Authorization': `Bearer ${token}`, - 'User-Agent': 'Crucix/1.0 intelligence-engine', - }, - }); + if (!token) { + return { + status: 'no_credentials', + error: 'reddit_oauth_required', + message: 'Reddit source requires OAuth; unauthenticated reddit.com .json scraping is disabled', + }; } - // Try public endpoint (may 403) - return safeFetch(`https://www.reddit.com/r/${subreddit}/hot.json?limit=${limit}&raw_json=1`, { - headers: { 'User-Agent': 'Crucix/1.0 intelligence-engine' }, + return safeFetch(`https://oauth.reddit.com/r/${subreddit}/hot?limit=${limit}&raw_json=1`, { + source: 'Reddit', + headers: { + 'Authorization': `Bearer ${token}`, + 'User-Agent': USER_AGENT, + }, }); } @@ -74,29 +122,46 @@ function compactPost(child) { }; } -export async function briefing() { - const token = await getToken(); +export async function briefing(opts = {}) { + const { + env = process.env, + subreddits = SUBREDDITS, + delayMs = 1000, + fetchImpl = globalThis.fetch, + } = opts; + const tokenResult = await getToken({ env, fetchImpl }); - if (!token && !process.env.REDDIT_CLIENT_ID) { + if (!tokenResult.ok) { return { source: 'Reddit', timestamp: new Date().toISOString(), - status: 'no_key', - message: 'Reddit requires OAuth. Register at https://www.reddit.com/prefs/apps/ (script type), set REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET in .env', + status: tokenResult.status, + error: tokenResult.error, + message: tokenResult.message, + missing: tokenResult.missing || [], }; } const subredditResults = {}; - for (const sub of SUBREDDITS) { - const result = await getHot(sub, { limit: 10, token }); + const errors = []; + for (const sub of subreddits) { + const result = await getHot(sub, { limit: 10, token: tokenResult.token }); + if (result?.error) { + errors.push({ subreddit: sub, error: result.error }); + subredditResults[sub] = []; + if (delayMs > 0) await delay(delayMs); + continue; + } const children = result?.data?.children || []; subredditResults[sub] = children.map(compactPost).filter(Boolean); - await delay(token ? 1000 : 2000); + if (delayMs > 0) await delay(delayMs); } return { source: 'Reddit', timestamp: new Date().toISOString(), + status: errors.length > 0 ? 'degraded' : 'ok', + ...(errors.length > 0 ? { error: 'reddit_subreddit_fetch_failed', errors } : {}), subreddits: subredditResults, }; } diff --git a/docs/sources/README.md b/docs/sources/README.md index 008b1f5..e8549a4 100644 --- a/docs/sources/README.md +++ b/docs/sources/README.md @@ -16,3 +16,4 @@ Source docs: - [Telegram](telegram.md) - [FIRMS](firms.md) - [Maritime](maritime.md) +- [Reddit](reddit.md) diff --git a/docs/sources/reddit.md b/docs/sources/reddit.md new file mode 100644 index 0000000..c7ce6e4 --- /dev/null +++ b/docs/sources/reddit.md @@ -0,0 +1,33 @@ +# Reddit Source + +Reddit is used as a social sentiment input for selected geopolitical and market subreddits. + +## Configuration + +Create a Reddit script app at: + +```text +https://www.reddit.com/prefs/apps/ +``` + +Then set: + +```env +REDDIT_CLIENT_ID= +REDDIT_CLIENT_SECRET= +``` + +## Runtime Behavior + +- The source uses the OAuth client credentials flow and then reads `https://oauth.reddit.com`. +- Unauthenticated `reddit.com/.../hot.json` scraping is intentionally disabled. +- Missing credentials return `status: no_credentials` and are surfaced as source degradation. +- OAuth failures return `status: auth_failed` without logging or returning the client secret. +- Subreddit fetch failures return `status: degraded` with per-subreddit errors. + +## Test + +```bash +node apis/sources/reddit.mjs +npm run test:unit +``` diff --git a/package.json b/package.json index 4f3e7cc..b4e582f 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "brief:save": "node apis/save-briefing.mjs", "diag": "node diag.mjs", "test": "npm run test:unit", - "test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs", + "test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs", "compose:config": "docker compose config", "clean": "node scripts/clean.mjs", "fresh-start": "npm run clean && npm start" diff --git a/test/reddit-source.test.mjs b/test/reddit-source.test.mjs new file mode 100644 index 0000000..1e61620 --- /dev/null +++ b/test/reddit-source.test.mjs @@ -0,0 +1,109 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { briefing, getHot, getRedditConfig, getToken } from '../apis/sources/reddit.mjs'; + +test('Reddit reports missing OAuth credentials without network access', async () => { + let calls = 0; + const data = await briefing({ + env: {}, + delayMs: 0, + fetchImpl: async () => { + calls++; + throw new Error('unexpected network access'); + }, + }); + + assert.equal(calls, 0); + assert.equal(data.status, 'no_credentials'); + assert.equal(data.error, 'missing_reddit_oauth_credentials'); + assert.deepEqual(data.missing, ['REDDIT_CLIENT_ID', 'REDDIT_CLIENT_SECRET']); +}); + +test('Reddit hot posts require OAuth token and never use public JSON fallback', async () => { + const originalFetch = globalThis.fetch; + let calledUrl = null; + globalThis.fetch = async url => { + calledUrl = url; + throw new Error('unexpected public fallback'); + }; + + try { + const data = await getHot('worldnews'); + assert.equal(calledUrl, null); + assert.equal(data.status, 'no_credentials'); + assert.equal(data.error, 'reddit_oauth_required'); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test('Reddit classifies OAuth HTTP failure without exposing secrets', async () => { + const result = await getToken({ + env: { REDDIT_CLIENT_ID: 'client-id', REDDIT_CLIENT_SECRET: 'client-secret' }, + fetchImpl: async () => ({ + ok: false, + status: 401, + text: async () => 'invalid client', + }), + }); + + assert.equal(result.ok, false); + assert.equal(result.status, 'auth_failed'); + assert.equal(result.error, 'reddit_oauth_http_401'); + assert.doesNotMatch(JSON.stringify(result), /client-secret/); +}); + +test('Reddit fetches hot posts through oauth.reddit.com when configured', async () => { + const originalFetch = globalThis.fetch; + const urls = []; + globalThis.fetch = async url => { + urls.push(String(url)); + if (String(url).includes('/api/v1/access_token')) { + return { + ok: true, + status: 200, + json: async () => ({ access_token: 'test-token' }), + }; + } + return { + ok: true, + status: 200, + headers: { get: () => 'application/json' }, + text: async () => JSON.stringify({ + data: { + children: [ + { + data: { + title: 'Market stress headline', + score: 42, + num_comments: 7, + url: 'https://example.test/post', + created_utc: 1700000000, + }, + }, + ], + }, + }), + }; + }; + + try { + const data = await briefing({ + env: { REDDIT_CLIENT_ID: 'client-id', REDDIT_CLIENT_SECRET: 'client-secret' }, + subreddits: ['worldnews'], + delayMs: 0, + }); + + assert.equal(data.status, 'ok'); + assert.equal(data.subreddits.worldnews[0].title, 'Market stress headline'); + assert.ok(urls.some(url => url === 'https://www.reddit.com/api/v1/access_token')); + assert.ok(urls.some(url => url.startsWith('https://oauth.reddit.com/r/worldnews/hot'))); + assert.equal(urls.some(url => url.includes('hot.json')), false); + } finally { + globalThis.fetch = originalFetch; + } +}); + +test('Reddit config reports partial credential state', () => { + assert.deepEqual(getRedditConfig({ REDDIT_CLIENT_ID: 'id' }).missing, ['REDDIT_CLIENT_SECRET']); +}); -- 2.49.1