Deep research: don't treat a bare 'yes' as the research topic (#858)

Deep research asks 2-3 clarifying questions first. When the user answers with a bare affirmation ('yes', 'ok', 'go ahead'), that short message becomes latest_message and the query-synthesis fallback returned it verbatim, so research ran on the literal word 'yes'. In ResearchHandler.synthesize_query, when synthesis can't run (history too short) or fails, fall back to the earliest substantive user message (the original ask) only when the latest message is an explicit affirmation/continuation phrase or is empty/punctuation-only. There is deliberately no length heuristic: a short answer like 'UK', 'C++', or 'Rust' in a clarification flow is a real topic and is left untouched. Tests cover query/topic selection: bare 'yes' -> original ask, short answers (UK, C++) kept, short-only-substantive message kept, and a multi-word follow-up still flows through synthesis.
2026-06-02 06:30:53 +04:00
parent 00f16d66a3
commit e152a339d1
2 changed files with 135 additions and 2 deletions
--- a/src/research_handler.py
+++ b/src/research_handler.py
@@ -69,8 +69,40 @@ class ResearchHandler:
        """
        # Build conversation context from history
        history = getattr(sess, 'history', [])
+
+        # A bare affirmation ("yes", "ok", "go ahead") is the user accepting the
+        # clarifying-question round, NOT a research topic — researching the word
+        # "yes" is the classic failure here. When synthesis can't run or fails,
+        # fall back to the earliest substantive user message (the original ask)
+        # rather than the literal follow-up.
+        #
+        # Match on an explicit affirmation/continuation phrase only (plus the
+        # empty/punctuation-only case). We deliberately do NOT use a length
+        # heuristic: a short answer like "UK", "C++", or "Rust" is a real topic
+        # in a clarification flow and must be left untouched.
+        _AFFIRMATIONS = {
+            "yes", "y", "yeah", "yep", "yup", "sure", "sure thing", "ok", "okay",
+            "k", "kk", "go", "go ahead", "go for it", "do it", "please",
+            "yes please", "sounds good", "continue", "proceed", "lets go",
+            "let's go", "yes go ahead",
+        }
+
+        def _normalize(text: str) -> str:
+            return (text or "").strip().lower().strip("!.? ")
+
+        def _fallback() -> str:
+            normalized = _normalize(latest_message)
+            if normalized and normalized not in _AFFIRMATIONS:
+                return latest_message  # short or long, it's a real topic
+            # Affirmation, or empty/punctuation-only: use the original ask.
+            for m in history:
+                c = (m.content or "").strip()
+                if m.role == "user" and c and _normalize(c) not in _AFFIRMATIONS:
+                    return c
+            return latest_message
+
        if len(history) <= 1:
-            return latest_message  # No conversation to synthesize
+            return _fallback()  # No conversation to synthesize

        # Take last 6 messages max for context
        recent = history[-6:]
@@ -104,7 +136,7 @@ class ResearchHandler:
        except Exception as e:
            logger.warning(f"Query synthesis failed: {e}")

-        return latest_message  # Fallback
+        return _fallback()

    async def generate_plan(
        self, query: str, llm_endpoint: str, llm_model: str, llm_headers: dict = None,
--- a/tests/test_research_query_fallback.py
+++ b/tests/test_research_query_fallback.py
@@ -0,0 +1,101 @@
+"""Tests for ResearchHandler.synthesize_query topic/fallback selection.
+
+Deep research asks clarifying questions first. When the user answers with a
+bare affirmation ("yes", "ok", "go ahead"), that follow-up must not become the
+research topic — we fall back to the original substantive ask. A short but
+meaningful answer ("UK", "C++", "Rust") is a real topic and must be preserved.
+"""
+import pytest
+
+from core.models import ChatMessage, Session
+from src.research_handler import ResearchHandler
+
+
+def _session(history):
+    return Session(
+        id="s1", name="t", endpoint_url="http://local.test", model="m",
+        history=[ChatMessage(role, content) for role, content in history],
+    )
+
+
+@pytest.fixture
+def handler():
+    return ResearchHandler()
+
+
+async def _raise(*args, **kwargs):
+    raise RuntimeError("synthesis unavailable")
+
+
+@pytest.mark.asyncio
+async def test_bare_yes_falls_back_to_original_ask(handler, monkeypatch):
+    # original ask + assistant clarification + user "yes" => original ask
+    monkeypatch.setattr("src.llm_core.llm_call_async", _raise)
+    sess = _session([
+        ("user", "What is the best electric car for a cold climate?"),
+        ("assistant", "Happy to research that — should I go ahead?"),
+    ])
+    result = await handler.synthesize_query(sess, "yes", "http://local.test", "m")
+    assert result == "What is the best electric car for a cold climate?"
+
+
+@pytest.mark.asyncio
+async def test_continuation_phrase_falls_back_to_original_ask(handler, monkeypatch):
+    monkeypatch.setattr("src.llm_core.llm_call_async", _raise)
+    sess = _session([
+        ("user", "Summarize recent advances in fusion energy."),
+        ("assistant", "Want me to go ahead and research this?"),
+    ])
+    result = await handler.synthesize_query(sess, "Go ahead!", "http://local.test", "m")
+    assert result == "Summarize recent advances in fusion energy."
+
+
+@pytest.mark.asyncio
+async def test_short_country_answer_is_kept(handler, monkeypatch):
+    # original ask + assistant asks "which country?" + user "UK" => "UK"
+    monkeypatch.setattr("src.llm_core.llm_call_async", _raise)
+    sess = _session([
+        ("user", "Compare national healthcare systems."),
+        ("assistant", "Which country should I focus on?"),
+    ])
+    result = await handler.synthesize_query(sess, "UK", "http://local.test", "m")
+    assert result == "UK"
+
+
+@pytest.mark.asyncio
+async def test_short_language_answer_is_kept(handler, monkeypatch):
+    # original ask + assistant asks "which language?" + user "C++" => "C++"
+    monkeypatch.setattr("src.llm_core.llm_call_async", _raise)
+    sess = _session([
+        ("user", "Find the fastest sorting library."),
+        ("assistant", "Which language are you targeting?"),
+    ])
+    result = await handler.synthesize_query(sess, "C++", "http://local.test", "m")
+    assert result == "C++"
+
+
+@pytest.mark.asyncio
+async def test_short_only_substantive_message_is_kept(handler):
+    # A short answer that is the only substantive message must not be swallowed.
+    sess = _session([("user", "Rust")])
+    result = await handler.synthesize_query(sess, "Rust", "http://local.test", "m")
+    assert result == "Rust"
+
+
+@pytest.mark.asyncio
+async def test_multiword_followup_uses_synthesis(handler, monkeypatch):
+    # A normal multi-word follow-up still flows through query synthesis untouched.
+    synthesized = "Best long-range EV for cold climates with fast charging"
+
+    async def _synth(*args, **kwargs):
+        return synthesized
+
+    monkeypatch.setattr("src.llm_core.llm_call_async", _synth)
+    sess = _session([
+        ("user", "What is the best electric car for a cold climate?"),
+        ("assistant", "Any constraints on range or charging?"),
+    ])
+    result = await handler.synthesize_query(
+        sess, "focus on long range and fast charging", "http://local.test", "m",
+    )
+    assert result == synthesized