From ce7f5dbbdd2e4cd8d80e279ce760a90e64c31fc2 Mon Sep 17 00:00:00 2001 From: lekt8 Date: Wed, 3 Jun 2026 02:00:52 +0800 Subject: [PATCH] Inject current date into deep research planning and query prompts (#1347) Deep research generated search queries from the LLM's training-cutoff knowledge, so it emitted stale-year queries like "best Python tutorials 2025" when the actual year is later (issue #1341). The chat/agent path already grounds the model with "Today is ..." (src/agent_loop.py); the deep research planning and query-generation prompts had no equivalent. Add a small current_date_context() helper and prepend it at the plan and query-generation prompt sites (and the research_handler plan preview path that reuses RESEARCH_PLAN_PROMPT). System-TZ local, portable strftime. Co-authored-by: Claude Opus 4.8 (1M context) --- src/deep_research.py | 19 ++++++- src/research_handler.py | 4 +- tests/test_deep_research_date_context.py | 68 ++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 tests/test_deep_research_date_context.py diff --git a/src/deep_research.py b/src/deep_research.py index 1c4ea39..245c476 100644 --- a/src/deep_research.py +++ b/src/deep_research.py @@ -11,6 +11,7 @@ import json import logging import re import time +from datetime import datetime from typing import Callable, Dict, List, Optional, Set from src.research_utils import strip_thinking, is_low_quality @@ -19,6 +20,20 @@ from src.goal_based_extractor import EXTRACTOR_PROMPT logger = logging.getLogger(__name__) + +def current_date_context() -> str: + """Preamble that grounds query-generation/planning LLMs in the real current + date. Without it the model falls back to its training-cutoff year and emits + queries like "best Python tutorials 2025" when the year is actually 2026. + System TZ-local so it matches what the user sees. Portable strftime only.""" + now = datetime.now().astimezone() + return ( + f"Today's date is {now.strftime('%B %d, %Y')} ({now.strftime('%Y-%m-%d')}). " + f"When a search query needs a year or refers to 'latest'/'current'/" + f"'this year', use {now.strftime('%Y')} or relative wording — never a " + f"year inferred from training data.\n\n" + ) + # --------------------------------------------------------------------------- # Prompts # --------------------------------------------------------------------------- @@ -364,7 +379,7 @@ class DeepResearcher: # ------------------------------------------------------------------ async def _create_plan(self, question: str) -> str: """LLM analyzes the question and creates a research plan.""" - prompt = RESEARCH_PLAN_PROMPT.format(question=question) + prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=question) try: response = await self._llm( [{"role": "user", "content": prompt}], @@ -439,7 +454,7 @@ class DeepResearcher: "that the report doesn't yet cover well." ) - prompt = QUERY_GEN_PROMPT.format( + prompt = current_date_context() + QUERY_GEN_PROMPT.format( question=question, research_plan=self.research_plan or "(No plan — search broadly.)", report=report or "(No findings yet.)", diff --git a/src/research_handler.py b/src/research_handler.py index 381530b..aed9cb6 100644 --- a/src/research_handler.py +++ b/src/research_handler.py @@ -161,10 +161,10 @@ class ResearchHandler: ) -> Optional[dict]: """Generate a research plan for user review before starting research.""" try: - from src.deep_research import RESEARCH_PLAN_PROMPT + from src.deep_research import RESEARCH_PLAN_PROMPT, current_date_context from src.llm_core import llm_call_async - prompt = RESEARCH_PLAN_PROMPT.format(question=query) + prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=query) response = await llm_call_async( url=llm_endpoint, model=llm_model, diff --git a/tests/test_deep_research_date_context.py b/tests/test_deep_research_date_context.py new file mode 100644 index 0000000..5096ac3 --- /dev/null +++ b/tests/test_deep_research_date_context.py @@ -0,0 +1,68 @@ +"""Regression tests for issue #1341 — deep research used the model's +training-cutoff year (e.g. "best Python tutorials 2025") because the +query-generation and planning prompts never told the LLM the current date. + +The chat/agent path already injects "Today is ..." (src/agent_loop.py); deep +research had no equivalent. These tests pin that the current year now reaches +the LLM at both the planning and query-generation steps, without needing a live +LLM or DB. +""" +import asyncio +from datetime import datetime + +from src.deep_research import ( + DeepResearcher, + current_date_context, + RESEARCH_PLAN_PROMPT, +) + + +def _this_year() -> str: + return datetime.now().astimezone().strftime("%Y") + + +def test_current_date_context_names_the_real_year(): + ctx = current_date_context() + assert _this_year() in ctx + # It must actively steer the model away from training-data years. + assert "training data" in ctx.lower() + + +def test_generate_queries_prompt_carries_the_current_year(): + # Build without the heavy __init__; _generate_queries only needs these. + r = DeepResearcher.__new__(DeepResearcher) + r.research_plan = "" + r.queries_used = set() + + seen = {} + + async def _fake_llm(messages, **kwargs): + seen["prompt"] = messages[0]["content"] + return '["python tutorials", "python guides"]' + + r._llm = _fake_llm + + queries = asyncio.run(r._generate_queries("best python tutorials", "", 1)) + + assert queries # sanity: the JSON array parsed + # The fix: the real current year is in the prompt the LLM actually sees. + assert _this_year() in seen["prompt"] + + +def test_plan_prompt_carries_the_current_year(): + r = DeepResearcher.__new__(DeepResearcher) + + seen = {} + + async def _fake_llm(messages, **kwargs): + seen["prompt"] = messages[0]["content"] + return "{}" + + r._llm = _fake_llm + + asyncio.run(r._create_plan("what changed this year")) + + assert _this_year() in seen["prompt"] + # The base template itself stays year-agnostic; the year comes from the + # prepended context, proving the wiring (not a hard-coded prompt edit). + assert _this_year() not in RESEARCH_PLAN_PROMPT