diff --git a/src/deep_research.py b/src/deep_research.py index 245c476..ee50629 100644 --- a/src/deep_research.py +++ b/src/deep_research.py @@ -344,6 +344,16 @@ class DeepResearcher: self._emit(phase="writing", total_sources=len(self.urls_fetched), total_findings=len(findings)) if not report: + # Synthesis can fail (e.g. the LLM timed out) even though the search + # rounds did gather findings. Don't throw that work away — return the + # gathered findings as a basic compiled report instead of claiming + # nothing was found (#1551). + if findings: + logger.warning( + "Synthesis produced no report; returning %d gathered " + "finding(s) as a fallback", len(findings) + ) + return self._fallback_report(question, findings) return "No information could be gathered for this question." self.evolving_report = report # preserve pre-synthesis report @@ -662,7 +672,11 @@ class DeepResearcher: [{"role": "user", "content": prompt}], temperature=0.3, max_tokens=self.max_report_tokens, - timeout=60, + # Synthesis is a heavy generation call like the final report + # (which gets 180s); a slow local model (e.g. a 20B served from + # LM Studio) routinely needs >60s for it. The old 60s cap timed + # out mid-stream and discarded the round's findings (#1551). + timeout=180, ) except Exception as e: logger.error(f"Synthesis failed: {e}") @@ -841,6 +855,21 @@ class DeepResearcher: parts.append(f"**Finding {i}** — [{title}]({url})\n{content}") return "\n\n".join(parts) + def _fallback_report(self, question: str, findings: List[Dict]) -> str: + """Compile gathered findings into a basic report. + + Used when the LLM synthesis step produced no report (e.g. it timed out) + but the search rounds did collect findings — so the user still gets the + material that was gathered instead of "No information could be gathered" + (#1551). + """ + return ( + f"# {question}\n\n" + "_Automatic synthesis did not complete, so this report lists the " + f"{len(findings)} finding(s) gathered during research._\n\n" + f"{self._format_findings(findings)}" + ) + def get_stats(self) -> Dict: """Return research statistics.""" elapsed = time.time() - self._start_time if self._start_time else 0 diff --git a/tests/test_deep_research_synthesis_resilience.py b/tests/test_deep_research_synthesis_resilience.py new file mode 100644 index 0000000..4a3ac61 --- /dev/null +++ b/tests/test_deep_research_synthesis_resilience.py @@ -0,0 +1,86 @@ +"""Regression tests for issue #1551 — deep research reported "No information +could be gathered" and showed nothing, even though the search rounds had already +extracted findings. + +Two root causes in src/deep_research.py: + +1. `_synthesize` hard-capped its LLM call at `timeout=60`, while extraction uses + the user's `extraction_timeout` (e.g. 300s) and the final report uses 180s. A + slow local model (the reporter served a 20B from LM Studio) needs >60s to + synthesize a round's findings, so synthesis timed out after 3 attempts. + +2. When synthesis failed on the first round, the gathered findings were thrown + away: `if not report: return "No information could be gathered…"`. The 8 + findings the run had already extracted were lost. + +The fixes: give synthesis the same 180s budget as the final report, and fall +back to a compiled report built from the gathered findings when synthesis +produced nothing. These run without a live LLM or DB (same stub pattern as +tests/test_deep_research_date_context.py). +""" +import asyncio + +from src.deep_research import DeepResearcher + + +def _researcher(): + # Build without the heavy __init__; the methods under test only need these. + r = DeepResearcher.__new__(DeepResearcher) + r.synthesis_window = 10 + r.max_report_tokens = 4096 + return r + + +_FINDINGS = [ + {"url": "https://ex.com/a", "title": "Diarization basics", + "summary": "Speaker diarization segments audio by speaker identity."}, + {"url": "https://ex.com/b", "title": "x-vectors", + "evidence": "x-vectors are embeddings used to cluster speech segments."}, +] + + +def test_synthesis_uses_a_generous_timeout_not_60s(): + """The synthesis LLM call must get a budget consistent with the final report + (180s), not the old 60s that timed out on slow local models (#1551).""" + r = _researcher() + seen = {} + + async def _fake_llm(messages, **kwargs): + seen.update(kwargs) + return "synthesized report" + + r._llm = _fake_llm + r._emit = lambda **k: None + + out = asyncio.run(r._synthesize("q", _FINDINGS, "")) + assert out == "synthesized report" + assert seen.get("timeout", 0) >= 180, f"synthesis timeout too short: {seen.get('timeout')}" + + +def test_fallback_report_preserves_findings(): + """_fallback_report must surface the gathered findings (title + content), + not a 'nothing found' message.""" + r = _researcher() + report = r._fallback_report("how does speaker diarization work", _FINDINGS) + assert "speaker diarization" in report.lower() + assert "Diarization basics" in report + assert "x-vectors" in report + assert "https://ex.com/a" in report + # It must NOT be the give-up message. + assert "No information could be gathered" not in report + + +def test_synthesis_failure_keeps_previous_report(): + """If synthesis raises, the previous report is preserved (not blanked) so the + findings survive the round and the fallback can use them.""" + r = _researcher() + + async def _boom(messages, **kwargs): + raise RuntimeError("502 after 3 attempts") + + r._llm = _boom + r._emit = lambda **k: None + + prev = "existing report body" + out = asyncio.run(r._synthesize("q", _FINDINGS, prev)) + assert out == prev # unchanged, not emptied