fix: make news geotagging deterministic
All checks were successful
Codex Template Compliance / template-compliance (pull_request) Successful in 5s
Build / test-and-image (pull_request) Successful in 1m4s

This commit is contained in:
2026-05-17 18:54:35 +02:00
parent a809a55881
commit 900f43ba13
3 changed files with 85 additions and 6 deletions

View File

@@ -83,16 +83,48 @@ const geoKeywords = {
'IMF':[38.9,-77],'World Bank':[38.9,-77],'UN':[40.7,-74],
};
function geoTagText(text) {
function escapeRegex(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function geoKeywordRegex(keyword) {
const flags = keyword.length <= 3 && keyword === keyword.toUpperCase() ? 'u' : 'iu';
return new RegExp(`(^|[^\\p{L}\\p{N}])${escapeRegex(keyword)}(?=$|[^\\p{L}\\p{N}])`, flags);
}
const geoKeywordEntries = Object.entries(geoKeywords)
.sort((a, b) => b[0].length - a[0].length)
.map(([keyword, coords]) => ({ keyword, coords, pattern: geoKeywordRegex(keyword) }));
export function geoTagText(text) {
if (!text) return null;
for (const [keyword, [lat, lon]] of Object.entries(geoKeywords)) {
if (text.includes(keyword)) {
for (const { keyword, coords, pattern } of geoKeywordEntries) {
if (pattern.test(text)) {
const [lat, lon] = coords;
return { lat, lon, region: keyword };
}
}
return null;
}
function stableHash(value) {
let hash = 2166136261;
for (let i = 0; i < value.length; i++) {
hash ^= value.charCodeAt(i);
hash = Math.imul(hash, 16777619);
}
return hash >>> 0;
}
export function stableGeoJitter(key, axis) {
const bucket = stableHash(`${axis}:${key}`) / 0xffffffff;
return (bucket - 0.5) * 2;
}
function newsGeoKey(item) {
return `${item.source || ''}|${item.title || ''}|${item.date || ''}|${item.url || ''}`;
}
function sanitizeExternalUrl(raw) {
if (!raw) return undefined;
try {
@@ -235,8 +267,8 @@ export async function fetchAllNews() {
source: item.source,
date: item.date,
url: item.url,
lat: geo.lat + (Math.random() - 0.5) * 2,
lon: geo.lon + (Math.random() - 0.5) * 2,
lat: geo.lat + stableGeoJitter(newsGeoKey(item), 'lat'),
lon: geo.lon + stableGeoJitter(newsGeoKey(item), 'lon'),
region: geo.region
});
}

View File

@@ -12,7 +12,7 @@
"brief:save": "node apis/save-briefing.mjs",
"diag": "node diag.mjs",
"test": "npm run test:unit",
"test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs",
"test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs test/dashboard-geotagging.test.mjs",
"compose:config": "docker compose config",
"clean": "node scripts/clean.mjs",
"fresh-start": "npm run clean && npm start"

View File

@@ -0,0 +1,47 @@
import test from 'node:test';
import assert from 'node:assert/strict';
import { geoTagText, stableGeoJitter } from '../dashboard/inject.mjs';
test('geoTagText matches headlines case-insensitively', () => {
assert.deepEqual(geoTagText('ukraine reports new air defense activity'), {
lat: 49,
lon: 32,
region: 'Ukraine',
});
assert.deepEqual(geoTagText('flooding disrupts são paulo transport'), {
lat: -23.5,
lon: -46.6,
region: 'São Paulo',
});
});
test('geoTagText prefers longer place names before broad countries', () => {
assert.deepEqual(geoTagText('New York markets react before wider US session'), {
lat: 40.7,
lon: -74,
region: 'New York',
});
});
test('geoTagText uses word boundaries to reduce false positives', () => {
assert.equal(geoTagText('A music festival announces its lineup'), null);
assert.equal(geoTagText('Officials discuss a new focus for aid'), null);
assert.deepEqual(geoTagText('US officials discuss a new aid package'), {
lat: 39,
lon: -98,
region: 'US',
});
});
test('stableGeoJitter is deterministic and bounded', () => {
const key = 'BBC|lower-case ukraine headline|Sun, 17 May 2026 12:00:00 GMT|https://example.test/a';
const latA = stableGeoJitter(key, 'lat');
const latB = stableGeoJitter(key, 'lat');
const lon = stableGeoJitter(key, 'lon');
assert.equal(latA, latB);
assert.notEqual(latA, lon);
assert.ok(latA >= -1 && latA <= 1);
assert.ok(lon >= -1 && lon <= 1);
});