Fix chat stream recovery and PDF library indexing (#468)

2026-06-01 16:33:35 +03:00
parent 92a81480f7
commit e1102585bf
4 changed files with 55 additions and 8 deletions
--- a/src/personal_docs.py
+++ b/src/personal_docs.py
@@ -29,7 +29,7 @@ class PersonalDocsConfig:
    """Configuration for personal documents management."""
    CHUNK_SIZE: int = 1000
    CHUNK_OVERLAP: int = 200
-    DEFAULT_EXTENSIONS: Tuple[str, ...] = (".txt", ".md", ".json")
+    DEFAULT_EXTENSIONS: Tuple[str, ...] = (".txt", ".md", ".json", ".pdf")
    DEFAULT_K: int = 5
    STOP_WORDS: Set[str] = None
@@ -85,7 +85,8 @@ def load_personal_index(
            if not any(name.lower().endswith(ext) for ext in extensions):
                continue
            size = os.path.getsize(p)
-            text = read_text_file(p)
+            ext = os.path.splitext(name)[1].lower()
            text = extract_pdf_text(p) if ext == ".pdf" else read_text_file(p)
            chunks = split_chunks(text)
            display = os.path.relpath(p, personal_dir)
            files.append({"name": display, "path": p, "size": size, "chunks": chunks})
--- a/static/js/chat.js
+++ b/static/js/chat.js
@@ -512,6 +512,9 @@ import createResearchSynapse from './researchSynapse.js';
    let timedOut = false;
    let processingProbeTimer = null;
    let processingProbeAbort = null;
    let _renderStream = () => {};
    let _cancelThinkingTimer = () => {};
    let _removeThinkingSpinner = () => {};
    const clearProcessingProbe = () => {
      if (processingProbeTimer) {
        clearTimeout(processingProbeTimer);
@@ -986,13 +989,13 @@ import createResearchSynapse from './researchSynapse.js';
      }
      const esc = uiModule.esc;
      // Remove thinking spinner helper
-      function _removeThinkingSpinner() {
+      _removeThinkingSpinner = () => {
        const el = document.querySelector('.agent-thinking-dots');
        if (el) {
          if (el._spinner) el._spinner.destroy();
          el.remove();
        }
-      }
+      };
      // Tool-aware thinking spinner
      let _lastToolName = '';
@@ -1056,9 +1059,9 @@ import createResearchSynapse from './researchSynapse.js';
          }
        }, 400);
      }
-      function _cancelThinkingTimer() {
+      _cancelThinkingTimer = () => {
        if (_textPauseTimer) { clearTimeout(_textPauseTimer); _textPauseTimer = null; }
-      }
+      };
      // Document streaming state (text-fence detection)
      let _docFenceOpened = false;
@@ -1085,7 +1088,7 @@ import createResearchSynapse from './researchSynapse.js';
      }
      // Direct render helper for streaming text
-      function _renderStream() {
+      _renderStream = () => {
        let dt = stripToolBlocks(roundText);
        const bodyEl = roundHolder.querySelector('.body');
        const contentEl = _ensureStreamLayout(bodyEl);
@@ -1184,7 +1187,7 @@ import createResearchSynapse from './researchSynapse.js';
        contentEl._prevTextLen = contentEl.textContent.length;
        if (window.hljs) contentEl.querySelectorAll('pre code').forEach((b) => window.hljs.highlightElement(b));
        uiModule.scrollHistory();
-      }
+      };
      // Walk text nodes, skip past `prevLen` characters of old text,
      // wrap everything after that in <span class="token-new"> for fade-in
--- a/tests/test_chat_stream_scope.py
+++ b/tests/test_chat_stream_scope.py
@@ -0,0 +1,19 @@
 from pathlib import Path
 def test_stream_render_helpers_are_visible_to_catch_block():
    source = Path("static/js/chat.js").read_text(encoding="utf-8")
    try_start = source.index("    try {\n      // Re-enable auto-scroll")
    catch_start = source.index("    } catch (err) {", try_start)
    outer_scope = source[:try_start]
    try_body = source[try_start:catch_start]
    assert "let _renderStream = () => {};" in outer_scope
    assert "let _cancelThinkingTimer = () => {};" in outer_scope
    assert "let _removeThinkingSpinner = () => {};" in outer_scope
    assert "_renderStream = () => {" in try_body
    assert "_cancelThinkingTimer = () => {" in try_body
    assert "_removeThinkingSpinner = () => {" in try_body
    assert "function _renderStream()" not in try_body
--- a/tests/test_personal_docs_pdf_index.py
+++ b/tests/test_personal_docs_pdf_index.py
@@ -0,0 +1,24 @@
 from pathlib import Path
 from src import personal_docs
 def test_personal_index_includes_pdf_uploads(tmp_path, monkeypatch):
    pdf_path = tmp_path / "notes.pdf"
    pdf_path.write_bytes(b"%PDF-1.4 fake test pdf")
    monkeypatch.setattr(
        personal_docs,
        "extract_pdf_text",
        lambda path: "readable pdf text" if Path(path) == pdf_path else "",
    )
    files = personal_docs.load_personal_index(str(tmp_path))
    assert [item["name"] for item in files] == ["notes.pdf"]
    assert files[0]["path"] == str(pdf_path)
    assert files[0]["chunks"] == ["readable pdf text"]
 def test_personal_index_default_extensions_advertise_pdf_support():
    assert ".pdf" in personal_docs.config.DEFAULT_EXTENSIONS