fix(research): support timeout defaults in direct tests (#2624)
fix(research): honor planning query timeouts
This commit is contained in:
@@ -196,6 +196,8 @@ class DeepResearcher:
|
|||||||
max_content_chars: int = 15000,
|
max_content_chars: int = 15000,
|
||||||
max_report_tokens: int = 8192,
|
max_report_tokens: int = 8192,
|
||||||
extraction_timeout: int = 90,
|
extraction_timeout: int = 90,
|
||||||
|
planning_timeout: int = 90,
|
||||||
|
query_timeout: int = 120,
|
||||||
extraction_concurrency: int = 3,
|
extraction_concurrency: int = 3,
|
||||||
min_rounds: int = 2,
|
min_rounds: int = 2,
|
||||||
max_empty_rounds: int = 2,
|
max_empty_rounds: int = 2,
|
||||||
@@ -215,6 +217,8 @@ class DeepResearcher:
|
|||||||
self.max_content_chars = max_content_chars
|
self.max_content_chars = max_content_chars
|
||||||
self.max_report_tokens = max_report_tokens
|
self.max_report_tokens = max_report_tokens
|
||||||
self.extraction_timeout = min(3600, max(15, int(extraction_timeout or 90)))
|
self.extraction_timeout = min(3600, max(15, int(extraction_timeout or 90)))
|
||||||
|
self.planning_timeout = min(3600, max(15, int(planning_timeout or 90)))
|
||||||
|
self.query_timeout = min(3600, max(15, int(query_timeout or 120)))
|
||||||
self.extraction_concurrency = min(12, max(1, int(extraction_concurrency or 3)))
|
self.extraction_concurrency = min(12, max(1, int(extraction_concurrency or 3)))
|
||||||
self.min_rounds = min_rounds
|
self.min_rounds = min_rounds
|
||||||
self.max_empty_rounds = max_empty_rounds
|
self.max_empty_rounds = max_empty_rounds
|
||||||
@@ -395,7 +399,7 @@ class DeepResearcher:
|
|||||||
[{"role": "user", "content": prompt}],
|
[{"role": "user", "content": prompt}],
|
||||||
temperature=0.3,
|
temperature=0.3,
|
||||||
max_tokens=1024,
|
max_tokens=1024,
|
||||||
timeout=30,
|
timeout=getattr(self, "planning_timeout", 90),
|
||||||
)
|
)
|
||||||
# Try to parse as JSON for structured plan
|
# Try to parse as JSON for structured plan
|
||||||
parsed = self._parse_json_object(response)
|
parsed = self._parse_json_object(response)
|
||||||
@@ -478,6 +482,7 @@ class DeepResearcher:
|
|||||||
[{"role": "user", "content": prompt}],
|
[{"role": "user", "content": prompt}],
|
||||||
temperature=0.5,
|
temperature=0.5,
|
||||||
max_tokens=4096,
|
max_tokens=4096,
|
||||||
|
timeout=getattr(self, "query_timeout", 120),
|
||||||
)
|
)
|
||||||
queries = self._parse_json_array(response)
|
queries = self._parse_json_array(response)
|
||||||
# Deduplicate
|
# Deduplicate
|
||||||
|
|||||||
@@ -722,6 +722,18 @@ class ResearchHandler:
|
|||||||
minimum=1,
|
minimum=1,
|
||||||
maximum=12,
|
maximum=12,
|
||||||
)
|
)
|
||||||
|
_planning_timeout = _bounded_int(
|
||||||
|
get_setting("research_planning_timeout_seconds", _extraction_timeout),
|
||||||
|
default=_extraction_timeout,
|
||||||
|
minimum=15,
|
||||||
|
maximum=3600,
|
||||||
|
)
|
||||||
|
_query_timeout = _bounded_int(
|
||||||
|
get_setting("research_query_timeout_seconds", _extraction_timeout),
|
||||||
|
default=_extraction_timeout,
|
||||||
|
minimum=15,
|
||||||
|
maximum=3600,
|
||||||
|
)
|
||||||
|
|
||||||
researcher = DeepResearcher(
|
researcher = DeepResearcher(
|
||||||
llm_endpoint=llm_endpoint,
|
llm_endpoint=llm_endpoint,
|
||||||
@@ -732,6 +744,8 @@ class ResearchHandler:
|
|||||||
max_time=max_time,
|
max_time=max_time,
|
||||||
max_report_tokens=_max_report_tokens,
|
max_report_tokens=_max_report_tokens,
|
||||||
extraction_timeout=_extraction_timeout,
|
extraction_timeout=_extraction_timeout,
|
||||||
|
planning_timeout=_planning_timeout,
|
||||||
|
query_timeout=_query_timeout,
|
||||||
extraction_concurrency=_extraction_concurrency,
|
extraction_concurrency=_extraction_concurrency,
|
||||||
progress_callback=progress_callback,
|
progress_callback=progress_callback,
|
||||||
search_provider=search_provider,
|
search_provider=search_provider,
|
||||||
|
|||||||
@@ -85,6 +85,11 @@ DEFAULT_SETTINGS = {
|
|||||||
"research_search_provider": "",
|
"research_search_provider": "",
|
||||||
"research_max_tokens": 16384,
|
"research_max_tokens": 16384,
|
||||||
"research_extraction_timeout_seconds": 90,
|
"research_extraction_timeout_seconds": 90,
|
||||||
|
# Lightweight planning/query LLM calls happen before any search starts.
|
||||||
|
# Keep them separately tunable so slow local backends are not capped by
|
||||||
|
# the old 30s/60s per-call defaults.
|
||||||
|
"research_planning_timeout_seconds": 90,
|
||||||
|
"research_query_timeout_seconds": 90,
|
||||||
"research_extraction_concurrency": 3,
|
"research_extraction_concurrency": 3,
|
||||||
# Hard wall-clock cap on a single deep-research run. The previous 600s
|
# Hard wall-clock cap on a single deep-research run. The previous 600s
|
||||||
# (10 min) default cut off slow local / edge LLMs mid-synthesis; 1800s
|
# (10 min) default cut off slow local / edge LLMs mid-synthesis; 1800s
|
||||||
|
|||||||
@@ -96,3 +96,33 @@ def test_extraction_timeout_allows_long_local_model_runs():
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert researcher.extraction_timeout == 1800
|
assert researcher.extraction_timeout == 1800
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_planning_and_query_generation_use_configured_timeouts():
|
||||||
|
researcher = DeepResearcher(
|
||||||
|
llm_endpoint="http://local.test/v1/chat/completions",
|
||||||
|
llm_model="local-model",
|
||||||
|
planning_timeout=234,
|
||||||
|
query_timeout=345,
|
||||||
|
)
|
||||||
|
captured = []
|
||||||
|
|
||||||
|
async def fake_llm(messages, temperature=0.3, max_tokens=4096, timeout=60):
|
||||||
|
captured.append(timeout)
|
||||||
|
if max_tokens == 1024:
|
||||||
|
return json.dumps({
|
||||||
|
"sub_questions": ["one"],
|
||||||
|
"key_topics": ["topic"],
|
||||||
|
"success_criteria": "complete",
|
||||||
|
})
|
||||||
|
return json.dumps(["query one", "query two"])
|
||||||
|
|
||||||
|
researcher._llm = fake_llm
|
||||||
|
|
||||||
|
plan = await researcher._create_plan("question")
|
||||||
|
queries = await researcher._generate_queries("question", "", 1)
|
||||||
|
|
||||||
|
assert "Sub-questions: one" in plan
|
||||||
|
assert queries == ["query one", "query two"]
|
||||||
|
assert captured == [234, 345]
|
||||||
|
|||||||
Reference in New Issue
Block a user