From ab5311c44d271acdf7fecce4b74bf1154a0e548d Mon Sep 17 00:00:00 2001 From: ooovenenoso <120500656+ooovenenoso@users.noreply.github.com> Date: Thu, 4 Jun 2026 14:23:17 -0400 Subject: [PATCH] fix(research): support timeout defaults in direct tests (#2624) fix(research): honor planning query timeouts --- src/deep_research.py | 7 ++++- src/research_handler.py | 14 +++++++++ src/settings.py | 5 ++++ .../test_deep_research_extraction_controls.py | 30 +++++++++++++++++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/deep_research.py b/src/deep_research.py index 4617439..7a31422 100644 --- a/src/deep_research.py +++ b/src/deep_research.py @@ -196,6 +196,8 @@ class DeepResearcher: max_content_chars: int = 15000, max_report_tokens: int = 8192, extraction_timeout: int = 90, + planning_timeout: int = 90, + query_timeout: int = 120, extraction_concurrency: int = 3, min_rounds: int = 2, max_empty_rounds: int = 2, @@ -215,6 +217,8 @@ class DeepResearcher: self.max_content_chars = max_content_chars self.max_report_tokens = max_report_tokens self.extraction_timeout = min(3600, max(15, int(extraction_timeout or 90))) + self.planning_timeout = min(3600, max(15, int(planning_timeout or 90))) + self.query_timeout = min(3600, max(15, int(query_timeout or 120))) self.extraction_concurrency = min(12, max(1, int(extraction_concurrency or 3))) self.min_rounds = min_rounds self.max_empty_rounds = max_empty_rounds @@ -395,7 +399,7 @@ class DeepResearcher: [{"role": "user", "content": prompt}], temperature=0.3, max_tokens=1024, - timeout=30, + timeout=getattr(self, "planning_timeout", 90), ) # Try to parse as JSON for structured plan parsed = self._parse_json_object(response) @@ -478,6 +482,7 @@ class DeepResearcher: [{"role": "user", "content": prompt}], temperature=0.5, max_tokens=4096, + timeout=getattr(self, "query_timeout", 120), ) queries = self._parse_json_array(response) # Deduplicate diff --git a/src/research_handler.py b/src/research_handler.py index f5d7f83..bec9695 100644 --- a/src/research_handler.py +++ b/src/research_handler.py @@ -722,6 +722,18 @@ class ResearchHandler: minimum=1, maximum=12, ) + _planning_timeout = _bounded_int( + get_setting("research_planning_timeout_seconds", _extraction_timeout), + default=_extraction_timeout, + minimum=15, + maximum=3600, + ) + _query_timeout = _bounded_int( + get_setting("research_query_timeout_seconds", _extraction_timeout), + default=_extraction_timeout, + minimum=15, + maximum=3600, + ) researcher = DeepResearcher( llm_endpoint=llm_endpoint, @@ -732,6 +744,8 @@ class ResearchHandler: max_time=max_time, max_report_tokens=_max_report_tokens, extraction_timeout=_extraction_timeout, + planning_timeout=_planning_timeout, + query_timeout=_query_timeout, extraction_concurrency=_extraction_concurrency, progress_callback=progress_callback, search_provider=search_provider, diff --git a/src/settings.py b/src/settings.py index 09a53c9..8f810a6 100644 --- a/src/settings.py +++ b/src/settings.py @@ -85,6 +85,11 @@ DEFAULT_SETTINGS = { "research_search_provider": "", "research_max_tokens": 16384, "research_extraction_timeout_seconds": 90, + # Lightweight planning/query LLM calls happen before any search starts. + # Keep them separately tunable so slow local backends are not capped by + # the old 30s/60s per-call defaults. + "research_planning_timeout_seconds": 90, + "research_query_timeout_seconds": 90, "research_extraction_concurrency": 3, # Hard wall-clock cap on a single deep-research run. The previous 600s # (10 min) default cut off slow local / edge LLMs mid-synthesis; 1800s diff --git a/tests/test_deep_research_extraction_controls.py b/tests/test_deep_research_extraction_controls.py index 3317ddc..a1158e1 100644 --- a/tests/test_deep_research_extraction_controls.py +++ b/tests/test_deep_research_extraction_controls.py @@ -96,3 +96,33 @@ def test_extraction_timeout_allows_long_local_model_runs(): ) assert researcher.extraction_timeout == 1800 + + +@pytest.mark.asyncio +async def test_planning_and_query_generation_use_configured_timeouts(): + researcher = DeepResearcher( + llm_endpoint="http://local.test/v1/chat/completions", + llm_model="local-model", + planning_timeout=234, + query_timeout=345, + ) + captured = [] + + async def fake_llm(messages, temperature=0.3, max_tokens=4096, timeout=60): + captured.append(timeout) + if max_tokens == 1024: + return json.dumps({ + "sub_questions": ["one"], + "key_topics": ["topic"], + "success_criteria": "complete", + }) + return json.dumps(["query one", "query two"]) + + researcher._llm = fake_llm + + plan = await researcher._create_plan("question") + queries = await researcher._generate_queries("question", "", 1) + + assert "Sub-questions: one" in plan + assert queries == ["query one", "query two"] + assert captured == [234, 345]