Merge branch 'codex/production-intelligence-terminal' into codex/issue-2-stale-alerts
This commit is contained in:
@@ -83,16 +83,48 @@ const geoKeywords = {
|
||||
'IMF':[38.9,-77],'World Bank':[38.9,-77],'UN':[40.7,-74],
|
||||
};
|
||||
|
||||
function geoTagText(text) {
|
||||
function escapeRegex(value) {
|
||||
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
function geoKeywordRegex(keyword) {
|
||||
const flags = keyword.length <= 3 && keyword === keyword.toUpperCase() ? 'u' : 'iu';
|
||||
return new RegExp(`(^|[^\\p{L}\\p{N}])${escapeRegex(keyword)}(?=$|[^\\p{L}\\p{N}])`, flags);
|
||||
}
|
||||
|
||||
const geoKeywordEntries = Object.entries(geoKeywords)
|
||||
.sort((a, b) => b[0].length - a[0].length)
|
||||
.map(([keyword, coords]) => ({ keyword, coords, pattern: geoKeywordRegex(keyword) }));
|
||||
|
||||
export function geoTagText(text) {
|
||||
if (!text) return null;
|
||||
for (const [keyword, [lat, lon]] of Object.entries(geoKeywords)) {
|
||||
if (text.includes(keyword)) {
|
||||
for (const { keyword, coords, pattern } of geoKeywordEntries) {
|
||||
if (pattern.test(text)) {
|
||||
const [lat, lon] = coords;
|
||||
return { lat, lon, region: keyword };
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function stableHash(value) {
|
||||
let hash = 2166136261;
|
||||
for (let i = 0; i < value.length; i++) {
|
||||
hash ^= value.charCodeAt(i);
|
||||
hash = Math.imul(hash, 16777619);
|
||||
}
|
||||
return hash >>> 0;
|
||||
}
|
||||
|
||||
export function stableGeoJitter(key, axis) {
|
||||
const bucket = stableHash(`${axis}:${key}`) / 0xffffffff;
|
||||
return (bucket - 0.5) * 2;
|
||||
}
|
||||
|
||||
function newsGeoKey(item) {
|
||||
return `${item.source || ''}|${item.title || ''}|${item.date || ''}|${item.url || ''}`;
|
||||
}
|
||||
|
||||
function sanitizeExternalUrl(raw) {
|
||||
if (!raw) return undefined;
|
||||
try {
|
||||
@@ -235,8 +267,8 @@ export async function fetchAllNews() {
|
||||
source: item.source,
|
||||
date: item.date,
|
||||
url: item.url,
|
||||
lat: geo.lat + (Math.random() - 0.5) * 2,
|
||||
lon: geo.lon + (Math.random() - 0.5) * 2,
|
||||
lat: geo.lat + stableGeoJitter(newsGeoKey(item), 'lat'),
|
||||
lon: geo.lon + stableGeoJitter(newsGeoKey(item), 'lon'),
|
||||
region: geo.region
|
||||
});
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"brief:save": "node apis/save-briefing.mjs",
|
||||
"diag": "node diag.mjs",
|
||||
"test": "npm run test:unit",
|
||||
"test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs",
|
||||
"test:unit": "node --test test/llm-openrouter.test.mjs test/llm-ollama.test.mjs test/llm-openai-compatible.test.mjs test/fetch-utils.test.mjs test/reddit-source.test.mjs test/acled-source.test.mjs test/mojibake-text.test.mjs test/dashboard-geotagging.test.mjs",
|
||||
"compose:config": "docker compose config",
|
||||
"clean": "node scripts/clean.mjs",
|
||||
"fresh-start": "npm run clean && npm start"
|
||||
|
||||
47
test/dashboard-geotagging.test.mjs
Normal file
47
test/dashboard-geotagging.test.mjs
Normal file
@@ -0,0 +1,47 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { geoTagText, stableGeoJitter } from '../dashboard/inject.mjs';
|
||||
|
||||
test('geoTagText matches headlines case-insensitively', () => {
|
||||
assert.deepEqual(geoTagText('ukraine reports new air defense activity'), {
|
||||
lat: 49,
|
||||
lon: 32,
|
||||
region: 'Ukraine',
|
||||
});
|
||||
|
||||
assert.deepEqual(geoTagText('flooding disrupts são paulo transport'), {
|
||||
lat: -23.5,
|
||||
lon: -46.6,
|
||||
region: 'São Paulo',
|
||||
});
|
||||
});
|
||||
|
||||
test('geoTagText prefers longer place names before broad countries', () => {
|
||||
assert.deepEqual(geoTagText('New York markets react before wider US session'), {
|
||||
lat: 40.7,
|
||||
lon: -74,
|
||||
region: 'New York',
|
||||
});
|
||||
});
|
||||
|
||||
test('geoTagText uses word boundaries to reduce false positives', () => {
|
||||
assert.equal(geoTagText('A music festival announces its lineup'), null);
|
||||
assert.equal(geoTagText('Officials discuss a new focus for aid'), null);
|
||||
assert.deepEqual(geoTagText('US officials discuss a new aid package'), {
|
||||
lat: 39,
|
||||
lon: -98,
|
||||
region: 'US',
|
||||
});
|
||||
});
|
||||
|
||||
test('stableGeoJitter is deterministic and bounded', () => {
|
||||
const key = 'BBC|lower-case ukraine headline|Sun, 17 May 2026 12:00:00 GMT|https://example.test/a';
|
||||
const latA = stableGeoJitter(key, 'lat');
|
||||
const latB = stableGeoJitter(key, 'lat');
|
||||
const lon = stableGeoJitter(key, 'lon');
|
||||
|
||||
assert.equal(latA, latB);
|
||||
assert.notEqual(latA, lon);
|
||||
assert.ok(latA >= -1 && latA <= 1);
|
||||
assert.ok(lon >= -1 && lon <= 1);
|
||||
});
|
||||
65
test/mojibake-text.test.mjs
Normal file
65
test/mojibake-text.test.mjs
Normal file
@@ -0,0 +1,65 @@
|
||||
import test from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { readdirSync, readFileSync, statSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
|
||||
const TEXT_ROOTS = ['locales'];
|
||||
|
||||
const TEXT_FILES = [];
|
||||
|
||||
const EXTENSIONS = new Set(['.json', '.html', '.mjs']);
|
||||
|
||||
const MOJIBAKE_PATTERNS = [
|
||||
{ name: 'latin1-accent', pattern: /\u00c3./g },
|
||||
{ name: 'stray-cp1252-prefix', pattern: /\u00c2./g },
|
||||
{ name: 'emoji-mojibake', pattern: /\u00f0\u0178/g },
|
||||
{
|
||||
name: 'punctuation-mojibake',
|
||||
pattern: /\u00e2[\u0080-\u009f\u20ac\u0153\u2018\u2019\u201c\u201d\u2013\u2014\u2022\u2026\u201e\u2021\u02c6\u2030\u2039\u203a\u0152\u017d]/g,
|
||||
},
|
||||
{ name: 'variation-selector-mojibake', pattern: /\u00ef\u00b8/g },
|
||||
{ name: 'ligature-mojibake', pattern: /\u00c5[\u0080-\u017f]/g },
|
||||
{ name: 'replacement-character', pattern: /\ufffd/g },
|
||||
];
|
||||
|
||||
function collectFiles(root) {
|
||||
const out = [];
|
||||
for (const entry of readdirSync(root, { withFileTypes: true })) {
|
||||
const path = join(root, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
out.push(...collectFiles(path));
|
||||
} else if (EXTENSIONS.has(path.slice(path.lastIndexOf('.')))) {
|
||||
out.push(path);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function textFiles() {
|
||||
const discovered = TEXT_ROOTS.flatMap(root => collectFiles(root));
|
||||
const explicit = TEXT_FILES.filter(path => statSync(path, { throwIfNoEntry: false })?.isFile());
|
||||
return [...new Set([...discovered, ...explicit])].sort();
|
||||
}
|
||||
|
||||
test('locale JSON files are valid UTF-8 JSON', () => {
|
||||
for (const file of collectFiles('locales')) {
|
||||
assert.doesNotThrow(() => JSON.parse(readFileSync(file, 'utf8')), `${file} must parse as JSON`);
|
||||
}
|
||||
});
|
||||
|
||||
test('locale text does not contain known mojibake sequences', () => {
|
||||
const failures = [];
|
||||
|
||||
for (const file of textFiles()) {
|
||||
const text = readFileSync(file, 'utf8');
|
||||
for (const { name, pattern } of MOJIBAKE_PATTERNS) {
|
||||
for (const match of text.matchAll(pattern)) {
|
||||
const start = Math.max(0, match.index - 30);
|
||||
const end = Math.min(text.length, match.index + 50);
|
||||
failures.push(`${file}: ${name}: ${JSON.stringify(text.slice(start, end))}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert.deepEqual(failures, []);
|
||||
});
|
||||
Reference in New Issue
Block a user