Lift deep-research hard timeout into a setting (#783)

The 600s wall-clock cap in research_handler.start_research was too short
for local / edge LLMs to finish a deep-research synthesis — long
extraction passes plus a slow final report routinely blew past 10
minutes and the run was killed with partial results.

Introduce research_run_timeout_seconds (default 1800s = 30 min) in
DEFAULT_SETTINGS and resolve it at start_research entry when the caller
hasn't pinned hard_timeout. Bound the resolved value at [60, 86400] so a
misconfigured settings.json can't either disable the safety net or
explode into a multi-day hang. Existing call sites in research_routes.py
and chat_routes.py keep working unchanged — they don't pass hard_timeout
and now pick up the new default.

Closes #595.
This commit is contained in:
tanmayraut45
2026-06-02 07:53:32 +05:30
committed by GitHub
parent f4aef0dcf7
commit cc40a3263e
2 changed files with 18 additions and 1 deletions

View File

@@ -164,7 +164,7 @@ class ResearchHandler:
llm_endpoint: str,
llm_model: str,
max_time: int = 300,
hard_timeout: int = 600,
hard_timeout: int = None,
llm_headers: dict = None,
on_complete: callable = None,
prior_report: str = "",
@@ -182,6 +182,18 @@ class ResearchHandler:
max_rounds is the safety cap; the AI's _should_stop decision (after
min_rounds) terminates the loop earlier in normal operation.
"""
# Resolve the hard wall-clock timeout from settings when the caller
# didn't pin one. Local / edge models routinely need more than the
# old 600s default to finish a deep-research synthesis.
if hard_timeout is None:
from src.settings import get_setting
hard_timeout = _bounded_int(
get_setting("research_run_timeout_seconds", 1800),
default=1800,
minimum=60,
maximum=86400,
)
# Cancel any existing research for this session
if session_id in self._active_tasks:
existing = self._active_tasks[session_id]

View File

@@ -66,6 +66,11 @@ DEFAULT_SETTINGS = {
"research_max_tokens": 16384,
"research_extraction_timeout_seconds": 90,
"research_extraction_concurrency": 3,
# Hard wall-clock cap on a single deep-research run. The previous 600s
# (10 min) default cut off slow local / edge LLMs mid-synthesis; 1800s
# (30 min) is comfortable for most local setups while still bounding
# runaway jobs. Tune via Settings or by editing data/settings.json.
"research_run_timeout_seconds": 1800,
"agent_max_tool_calls": 0,
"agent_input_token_budget": 6000,
"agent_stream_timeout_seconds": 300,