Fix HTML entity decoding and broaden OSINT dedup window

- Replace single ' handler with generic numeric/hex entity decoder
  so ' and other unpadded entities are properly converted
- Dedup urgent OSINT posts against all hot memory runs (last 3 sweeps)
  instead of only the previous sweep, preventing posts that drop out
  of one sweep from reappearing as "new" in the next

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Greg Scher
2026-03-23 13:01:32 -04:00
parent 31c305cbbb
commit b7322f1c7e
3 changed files with 12 additions and 4 deletions

View File

@@ -90,7 +90,7 @@ function contentHash(text) {
* @param {object|null} previous - previous sweep's synthesized data (null on first run)
* @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} }
*/
export function computeDelta(current, previous, thresholdOverrides = {}) {
export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) {
if (!previous) return null;
if (!current) return null;
@@ -152,8 +152,11 @@ export function computeDelta(current, previous, thresholdOverrides = {}) {
// ─── New urgent Telegram posts (semantic dedup) ──────────────────────
// Dedup against all recent runs (not just the last one) to catch posts that
// drop out of one sweep but reappear in a later one.
const sources = priorRuns.length > 0 ? priorRuns : [previous];
const prevHashes = new Set(
(previous.tg?.urgent || []).map(p => contentHash(p.text))
sources.flatMap(run => (run?.tg?.urgent || []).map(p => contentHash(p.text)))
);
for (const post of (current.tg?.urgent || [])) {

View File

@@ -74,7 +74,9 @@ export class MemoryManager {
// Add a new run to hot memory
addRun(synthesizedData) {
const previous = this.getLastRun();
const delta = computeDelta(synthesizedData, previous);
// Collect urgent post hashes from all hot runs for broader dedup window
const priorRuns = this.hot.runs.map(r => r.data);
const delta = computeDelta(synthesizedData, previous, {}, priorRuns);
// Compact the data for storage (strip large arrays)
const compact = this._compactForStorage(synthesizedData);