From f6f86c4b3461451e307c2fb89896a1747726ccba Mon Sep 17 00:00:00 2001 From: Afonso Coutinho Date: Wed, 3 Jun 2026 05:34:40 +0100 Subject: [PATCH] fix: research source extraction crashes on a non-dict finding (#1714) --- src/research_handler.py | 2 ++ tests/test_research_handler_sources_nondict.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/test_research_handler_sources_nondict.py diff --git a/src/research_handler.py b/src/research_handler.py index ce7d6de..f5d7f83 100644 --- a/src/research_handler.py +++ b/src/research_handler.py @@ -461,6 +461,8 @@ class ResearchHandler: seen = set() sources = [] for f in findings: + if not isinstance(f, dict): + continue url = f.get("url", "") title = f.get("title", "") or url summary = f.get("summary", "") or f.get("evidence", "") diff --git a/tests/test_research_handler_sources_nondict.py b/tests/test_research_handler_sources_nondict.py new file mode 100644 index 0000000..4d6947f --- /dev/null +++ b/tests/test_research_handler_sources_nondict.py @@ -0,0 +1,15 @@ +from src.research_handler import ResearchHandler + + +def test_extract_sources_skips_non_dict_findings(): + # findings come from the DeepResearcher result list / cached JSON; a + # malformed entry (None or a bare string) made the old loop call .get on a + # non-dict and crash, dropping every real source in the set. + findings = [ + {"url": "https://a.com", "title": "A", "summary": "real analysis of the topic"}, + "junk-row", + None, + {"url": "https://b.com", "summary": "more genuine detail here"}, + ] + out = ResearchHandler._extract_sources(findings) + assert [s["url"] for s in out] == ["https://a.com", "https://b.com"]