From e152a339d10fcff13e2a3dce8c784d23362e5a31 Mon Sep 17 00:00:00 2001 From: Mahdi Salmanzade Date: Tue, 2 Jun 2026 06:30:53 +0400 Subject: [PATCH] Deep research: don't treat a bare 'yes' as the research topic (#858) Deep research asks 2-3 clarifying questions first. When the user answers with a bare affirmation ('yes', 'ok', 'go ahead'), that short message becomes latest_message and the query-synthesis fallback returned it verbatim, so research ran on the literal word 'yes'. In ResearchHandler.synthesize_query, when synthesis can't run (history too short) or fails, fall back to the earliest substantive user message (the original ask) only when the latest message is an explicit affirmation/continuation phrase or is empty/punctuation-only. There is deliberately no length heuristic: a short answer like 'UK', 'C++', or 'Rust' in a clarification flow is a real topic and is left untouched. Tests cover query/topic selection: bare 'yes' -> original ask, short answers (UK, C++) kept, short-only-substantive message kept, and a multi-word follow-up still flows through synthesis. --- src/research_handler.py | 36 ++++++++- tests/test_research_query_fallback.py | 101 ++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 tests/test_research_query_fallback.py diff --git a/src/research_handler.py b/src/research_handler.py index 9bfedd9..4ad39af 100644 --- a/src/research_handler.py +++ b/src/research_handler.py @@ -69,8 +69,40 @@ class ResearchHandler: """ # Build conversation context from history history = getattr(sess, 'history', []) + + # A bare affirmation ("yes", "ok", "go ahead") is the user accepting the + # clarifying-question round, NOT a research topic — researching the word + # "yes" is the classic failure here. When synthesis can't run or fails, + # fall back to the earliest substantive user message (the original ask) + # rather than the literal follow-up. + # + # Match on an explicit affirmation/continuation phrase only (plus the + # empty/punctuation-only case). We deliberately do NOT use a length + # heuristic: a short answer like "UK", "C++", or "Rust" is a real topic + # in a clarification flow and must be left untouched. + _AFFIRMATIONS = { + "yes", "y", "yeah", "yep", "yup", "sure", "sure thing", "ok", "okay", + "k", "kk", "go", "go ahead", "go for it", "do it", "please", + "yes please", "sounds good", "continue", "proceed", "lets go", + "let's go", "yes go ahead", + } + + def _normalize(text: str) -> str: + return (text or "").strip().lower().strip("!.? ") + + def _fallback() -> str: + normalized = _normalize(latest_message) + if normalized and normalized not in _AFFIRMATIONS: + return latest_message # short or long, it's a real topic + # Affirmation, or empty/punctuation-only: use the original ask. + for m in history: + c = (m.content or "").strip() + if m.role == "user" and c and _normalize(c) not in _AFFIRMATIONS: + return c + return latest_message + if len(history) <= 1: - return latest_message # No conversation to synthesize + return _fallback() # No conversation to synthesize # Take last 6 messages max for context recent = history[-6:] @@ -104,7 +136,7 @@ class ResearchHandler: except Exception as e: logger.warning(f"Query synthesis failed: {e}") - return latest_message # Fallback + return _fallback() async def generate_plan( self, query: str, llm_endpoint: str, llm_model: str, llm_headers: dict = None, diff --git a/tests/test_research_query_fallback.py b/tests/test_research_query_fallback.py new file mode 100644 index 0000000..dc00fcd --- /dev/null +++ b/tests/test_research_query_fallback.py @@ -0,0 +1,101 @@ +"""Tests for ResearchHandler.synthesize_query topic/fallback selection. + +Deep research asks clarifying questions first. When the user answers with a +bare affirmation ("yes", "ok", "go ahead"), that follow-up must not become the +research topic — we fall back to the original substantive ask. A short but +meaningful answer ("UK", "C++", "Rust") is a real topic and must be preserved. +""" +import pytest + +from core.models import ChatMessage, Session +from src.research_handler import ResearchHandler + + +def _session(history): + return Session( + id="s1", name="t", endpoint_url="http://local.test", model="m", + history=[ChatMessage(role, content) for role, content in history], + ) + + +@pytest.fixture +def handler(): + return ResearchHandler() + + +async def _raise(*args, **kwargs): + raise RuntimeError("synthesis unavailable") + + +@pytest.mark.asyncio +async def test_bare_yes_falls_back_to_original_ask(handler, monkeypatch): + # original ask + assistant clarification + user "yes" => original ask + monkeypatch.setattr("src.llm_core.llm_call_async", _raise) + sess = _session([ + ("user", "What is the best electric car for a cold climate?"), + ("assistant", "Happy to research that — should I go ahead?"), + ]) + result = await handler.synthesize_query(sess, "yes", "http://local.test", "m") + assert result == "What is the best electric car for a cold climate?" + + +@pytest.mark.asyncio +async def test_continuation_phrase_falls_back_to_original_ask(handler, monkeypatch): + monkeypatch.setattr("src.llm_core.llm_call_async", _raise) + sess = _session([ + ("user", "Summarize recent advances in fusion energy."), + ("assistant", "Want me to go ahead and research this?"), + ]) + result = await handler.synthesize_query(sess, "Go ahead!", "http://local.test", "m") + assert result == "Summarize recent advances in fusion energy." + + +@pytest.mark.asyncio +async def test_short_country_answer_is_kept(handler, monkeypatch): + # original ask + assistant asks "which country?" + user "UK" => "UK" + monkeypatch.setattr("src.llm_core.llm_call_async", _raise) + sess = _session([ + ("user", "Compare national healthcare systems."), + ("assistant", "Which country should I focus on?"), + ]) + result = await handler.synthesize_query(sess, "UK", "http://local.test", "m") + assert result == "UK" + + +@pytest.mark.asyncio +async def test_short_language_answer_is_kept(handler, monkeypatch): + # original ask + assistant asks "which language?" + user "C++" => "C++" + monkeypatch.setattr("src.llm_core.llm_call_async", _raise) + sess = _session([ + ("user", "Find the fastest sorting library."), + ("assistant", "Which language are you targeting?"), + ]) + result = await handler.synthesize_query(sess, "C++", "http://local.test", "m") + assert result == "C++" + + +@pytest.mark.asyncio +async def test_short_only_substantive_message_is_kept(handler): + # A short answer that is the only substantive message must not be swallowed. + sess = _session([("user", "Rust")]) + result = await handler.synthesize_query(sess, "Rust", "http://local.test", "m") + assert result == "Rust" + + +@pytest.mark.asyncio +async def test_multiword_followup_uses_synthesis(handler, monkeypatch): + # A normal multi-word follow-up still flows through query synthesis untouched. + synthesized = "Best long-range EV for cold climates with fast charging" + + async def _synth(*args, **kwargs): + return synthesized + + monkeypatch.setattr("src.llm_core.llm_call_async", _synth) + sess = _session([ + ("user", "What is the best electric car for a cold climate?"), + ("assistant", "Any constraints on range or charging?"), + ]) + result = await handler.synthesize_query( + sess, "focus on long range and fast charging", "http://local.test", "m", + ) + assert result == synthesized