From a880b1762412d59eeb3de00a1829d8d7aed41077 Mon Sep 17 00:00:00 2001
From: Afonso Coutinho <afonso@omelhorsite.pt>
Date: Wed, 3 Jun 2026 05:42:05 +0100
Subject: [PATCH] Skip malformed personal keyword index rows

Make personal keyword retrieval tolerate corrupted non-dict index entries and missing chunk lists, with regression coverage.
---
 src/personal_docs.py                        |  6 ++++--
 tests/test_personal_docs_keyword_nondict.py | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_personal_docs_keyword_nondict.py

diff --git a/src/personal_docs.py b/src/personal_docs.py
index 06875d0..92ba1bc 100644
--- a/src/personal_docs.py
+++ b/src/personal_docs.py
@@ -134,10 +134,12 @@ def retrieve_personal_keyword(personal_index: List[Dict], query: str, k: int = 5
 
     scored = []
     for f in personal_index:
-        for idx, ch in enumerate(f["chunks"]):
+        if not isinstance(f, dict):
+            continue
+        for idx, ch in enumerate(f.get("chunks") or []):
             score = len(q & tokenize(ch))
             if score > 0:
-                scored.append((score, f["name"], idx, ch))
+                scored.append((score, f.get("name", ""), idx, ch))
     scored.sort(key=lambda x: x[0], reverse=True)
 
     out = []
diff --git a/tests/test_personal_docs_keyword_nondict.py b/tests/test_personal_docs_keyword_nondict.py
new file mode 100644
index 0000000..f46c9f4
--- /dev/null
+++ b/tests/test_personal_docs_keyword_nondict.py
@@ -0,0 +1,21 @@
+from src.personal_docs import retrieve_personal_keyword
+
+
+def test_retrieve_personal_keyword_skips_non_dict_rows():
+    # A corrupted personal index can hold non-dict rows (partial write, bad
+    # import). The old loop did f["chunks"] which raised TypeError on a str
+    # row and aborted the whole search; now bad rows are skipped.
+    index = [
+        "bad-row",
+        None,
+        ["also", "bad"],
+        {"name": "report.txt", "chunks": ["hello world from the quarterly report"]},
+    ]
+    out = retrieve_personal_keyword(index, "hello", k=5)
+    assert out == ["[report.txt :: chunk 1]\nhello world from the quarterly report"]
+
+
+def test_retrieve_personal_keyword_tolerates_missing_chunks_key():
+    index = [{"name": "empty.txt"}, {"name": "doc.txt", "chunks": ["alpha beta gamma"]}]
+    out = retrieve_personal_keyword(index, "beta", k=5)
+    assert out == ["[doc.txt :: chunk 1]\nalpha beta gamma"]