From f6f86c4b3461451e307c2fb89896a1747726ccba Mon Sep 17 00:00:00 2001
From: Afonso Coutinho <afonso@omelhorsite.pt>
Date: Wed, 3 Jun 2026 05:34:40 +0100
Subject: [PATCH] fix: research source extraction crashes on a non-dict finding
 (#1714)

---
 src/research_handler.py                        |  2 ++
 tests/test_research_handler_sources_nondict.py | 15 +++++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 tests/test_research_handler_sources_nondict.py

diff --git a/src/research_handler.py b/src/research_handler.py
index ce7d6de..f5d7f83 100644
--- a/src/research_handler.py
+++ b/src/research_handler.py
@@ -461,6 +461,8 @@ class ResearchHandler:
         seen = set()
         sources = []
         for f in findings:
+            if not isinstance(f, dict):
+                continue
             url = f.get("url", "")
             title = f.get("title", "") or url
             summary = f.get("summary", "") or f.get("evidence", "")
diff --git a/tests/test_research_handler_sources_nondict.py b/tests/test_research_handler_sources_nondict.py
new file mode 100644
index 0000000..4d6947f
--- /dev/null
+++ b/tests/test_research_handler_sources_nondict.py
@@ -0,0 +1,15 @@
+from src.research_handler import ResearchHandler
+
+
+def test_extract_sources_skips_non_dict_findings():
+    # findings come from the DeepResearcher result list / cached JSON; a
+    # malformed entry (None or a bare string) made the old loop call .get on a
+    # non-dict and crash, dropping every real source in the set.
+    findings = [
+        {"url": "https://a.com", "title": "A", "summary": "real analysis of the topic"},
+        "junk-row",
+        None,
+        {"url": "https://b.com", "summary": "more genuine detail here"},
+    ]
+    out = ResearchHandler._extract_sources(findings)
+    assert [s["url"] for s in out] == ["https://a.com", "https://b.com"]