From ce7f5dbbdd2e4cd8d80e279ce760a90e64c31fc2 Mon Sep 17 00:00:00 2001
From: lekt8 <lewistham9x@gmail.com>
Date: Wed, 3 Jun 2026 02:00:52 +0800
Subject: [PATCH] Inject current date into deep research planning and query
 prompts (#1347)

Deep research generated search queries from the LLM's training-cutoff
knowledge, so it emitted stale-year queries like "best Python tutorials
2025" when the actual year is later (issue #1341). The chat/agent path
already grounds the model with "Today is ..." (src/agent_loop.py); the
deep research planning and query-generation prompts had no equivalent.

Add a small current_date_context() helper and prepend it at the plan and
query-generation prompt sites (and the research_handler plan preview path
that reuses RESEARCH_PLAN_PROMPT). System-TZ local, portable strftime.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/deep_research.py                     | 19 ++++++-
 src/research_handler.py                  |  4 +-
 tests/test_deep_research_date_context.py | 68 ++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100644 tests/test_deep_research_date_context.py

diff --git a/src/deep_research.py b/src/deep_research.py
index 1c4ea39..245c476 100644
--- a/src/deep_research.py
+++ b/src/deep_research.py
@@ -11,6 +11,7 @@ import json
 import logging
 import re
 import time
+from datetime import datetime
 from typing import Callable, Dict, List, Optional, Set
 
 from src.research_utils import strip_thinking, is_low_quality
@@ -19,6 +20,20 @@ from src.goal_based_extractor import EXTRACTOR_PROMPT
 
 logger = logging.getLogger(__name__)
 
+
+def current_date_context() -> str:
+    """Preamble that grounds query-generation/planning LLMs in the real current
+    date. Without it the model falls back to its training-cutoff year and emits
+    queries like "best Python tutorials 2025" when the year is actually 2026.
+    System TZ-local so it matches what the user sees. Portable strftime only."""
+    now = datetime.now().astimezone()
+    return (
+        f"Today's date is {now.strftime('%B %d, %Y')} ({now.strftime('%Y-%m-%d')}). "
+        f"When a search query needs a year or refers to 'latest'/'current'/"
+        f"'this year', use {now.strftime('%Y')} or relative wording — never a "
+        f"year inferred from training data.\n\n"
+    )
+
 # ---------------------------------------------------------------------------
 # Prompts
 # ---------------------------------------------------------------------------
@@ -364,7 +379,7 @@ class DeepResearcher:
     # ------------------------------------------------------------------
     async def _create_plan(self, question: str) -> str:
         """LLM analyzes the question and creates a research plan."""
-        prompt = RESEARCH_PLAN_PROMPT.format(question=question)
+        prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=question)
         try:
             response = await self._llm(
                 [{"role": "user", "content": prompt}],
@@ -439,7 +454,7 @@ class DeepResearcher:
                 "that the report doesn't yet cover well."
             )
 
-        prompt = QUERY_GEN_PROMPT.format(
+        prompt = current_date_context() + QUERY_GEN_PROMPT.format(
             question=question,
             research_plan=self.research_plan or "(No plan — search broadly.)",
             report=report or "(No findings yet.)",
diff --git a/src/research_handler.py b/src/research_handler.py
index 381530b..aed9cb6 100644
--- a/src/research_handler.py
+++ b/src/research_handler.py
@@ -161,10 +161,10 @@ class ResearchHandler:
     ) -> Optional[dict]:
         """Generate a research plan for user review before starting research."""
         try:
-            from src.deep_research import RESEARCH_PLAN_PROMPT
+            from src.deep_research import RESEARCH_PLAN_PROMPT, current_date_context
             from src.llm_core import llm_call_async
 
-            prompt = RESEARCH_PLAN_PROMPT.format(question=query)
+            prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=query)
             response = await llm_call_async(
                 url=llm_endpoint,
                 model=llm_model,
diff --git a/tests/test_deep_research_date_context.py b/tests/test_deep_research_date_context.py
new file mode 100644
index 0000000..5096ac3
--- /dev/null
+++ b/tests/test_deep_research_date_context.py
@@ -0,0 +1,68 @@
+"""Regression tests for issue #1341 — deep research used the model's
+training-cutoff year (e.g. "best Python tutorials 2025") because the
+query-generation and planning prompts never told the LLM the current date.
+
+The chat/agent path already injects "Today is ..." (src/agent_loop.py); deep
+research had no equivalent. These tests pin that the current year now reaches
+the LLM at both the planning and query-generation steps, without needing a live
+LLM or DB.
+"""
+import asyncio
+from datetime import datetime
+
+from src.deep_research import (
+    DeepResearcher,
+    current_date_context,
+    RESEARCH_PLAN_PROMPT,
+)
+
+
+def _this_year() -> str:
+    return datetime.now().astimezone().strftime("%Y")
+
+
+def test_current_date_context_names_the_real_year():
+    ctx = current_date_context()
+    assert _this_year() in ctx
+    # It must actively steer the model away from training-data years.
+    assert "training data" in ctx.lower()
+
+
+def test_generate_queries_prompt_carries_the_current_year():
+    # Build without the heavy __init__; _generate_queries only needs these.
+    r = DeepResearcher.__new__(DeepResearcher)
+    r.research_plan = ""
+    r.queries_used = set()
+
+    seen = {}
+
+    async def _fake_llm(messages, **kwargs):
+        seen["prompt"] = messages[0]["content"]
+        return '["python tutorials", "python guides"]'
+
+    r._llm = _fake_llm
+
+    queries = asyncio.run(r._generate_queries("best python tutorials", "", 1))
+
+    assert queries  # sanity: the JSON array parsed
+    # The fix: the real current year is in the prompt the LLM actually sees.
+    assert _this_year() in seen["prompt"]
+
+
+def test_plan_prompt_carries_the_current_year():
+    r = DeepResearcher.__new__(DeepResearcher)
+
+    seen = {}
+
+    async def _fake_llm(messages, **kwargs):
+        seen["prompt"] = messages[0]["content"]
+        return "{}"
+
+    r._llm = _fake_llm
+
+    asyncio.run(r._create_plan("what changed this year"))
+
+    assert _this_year() in seen["prompt"]
+    # The base template itself stays year-agnostic; the year comes from the
+    # prepended context, proving the wiring (not a hard-coded prompt edit).
+    assert _this_year() not in RESEARCH_PLAN_PROMPT