Inject current date into deep research planning and query prompts (#1347)

Deep research generated search queries from the LLM's training-cutoff
knowledge, so it emitted stale-year queries like "best Python tutorials
2025" when the actual year is later (issue #1341). The chat/agent path
already grounds the model with "Today is ..." (src/agent_loop.py); the
deep research planning and query-generation prompts had no equivalent.

Add a small current_date_context() helper and prepend it at the plan and
query-generation prompt sites (and the research_handler plan preview path
that reuses RESEARCH_PLAN_PROMPT). System-TZ local, portable strftime.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
lekt8
2026-06-03 02:00:52 +08:00
committed by GitHub
parent b2291fad49
commit ce7f5dbbdd
3 changed files with 87 additions and 4 deletions

View File

@@ -11,6 +11,7 @@ import json
import logging import logging
import re import re
import time import time
from datetime import datetime
from typing import Callable, Dict, List, Optional, Set from typing import Callable, Dict, List, Optional, Set
from src.research_utils import strip_thinking, is_low_quality from src.research_utils import strip_thinking, is_low_quality
@@ -19,6 +20,20 @@ from src.goal_based_extractor import EXTRACTOR_PROMPT
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def current_date_context() -> str:
"""Preamble that grounds query-generation/planning LLMs in the real current
date. Without it the model falls back to its training-cutoff year and emits
queries like "best Python tutorials 2025" when the year is actually 2026.
System TZ-local so it matches what the user sees. Portable strftime only."""
now = datetime.now().astimezone()
return (
f"Today's date is {now.strftime('%B %d, %Y')} ({now.strftime('%Y-%m-%d')}). "
f"When a search query needs a year or refers to 'latest'/'current'/"
f"'this year', use {now.strftime('%Y')} or relative wording — never a "
f"year inferred from training data.\n\n"
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Prompts # Prompts
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -364,7 +379,7 @@ class DeepResearcher:
# ------------------------------------------------------------------ # ------------------------------------------------------------------
async def _create_plan(self, question: str) -> str: async def _create_plan(self, question: str) -> str:
"""LLM analyzes the question and creates a research plan.""" """LLM analyzes the question and creates a research plan."""
prompt = RESEARCH_PLAN_PROMPT.format(question=question) prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=question)
try: try:
response = await self._llm( response = await self._llm(
[{"role": "user", "content": prompt}], [{"role": "user", "content": prompt}],
@@ -439,7 +454,7 @@ class DeepResearcher:
"that the report doesn't yet cover well." "that the report doesn't yet cover well."
) )
prompt = QUERY_GEN_PROMPT.format( prompt = current_date_context() + QUERY_GEN_PROMPT.format(
question=question, question=question,
research_plan=self.research_plan or "(No plan — search broadly.)", research_plan=self.research_plan or "(No plan — search broadly.)",
report=report or "(No findings yet.)", report=report or "(No findings yet.)",

View File

@@ -161,10 +161,10 @@ class ResearchHandler:
) -> Optional[dict]: ) -> Optional[dict]:
"""Generate a research plan for user review before starting research.""" """Generate a research plan for user review before starting research."""
try: try:
from src.deep_research import RESEARCH_PLAN_PROMPT from src.deep_research import RESEARCH_PLAN_PROMPT, current_date_context
from src.llm_core import llm_call_async from src.llm_core import llm_call_async
prompt = RESEARCH_PLAN_PROMPT.format(question=query) prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=query)
response = await llm_call_async( response = await llm_call_async(
url=llm_endpoint, url=llm_endpoint,
model=llm_model, model=llm_model,

View File

@@ -0,0 +1,68 @@
"""Regression tests for issue #1341 — deep research used the model's
training-cutoff year (e.g. "best Python tutorials 2025") because the
query-generation and planning prompts never told the LLM the current date.
The chat/agent path already injects "Today is ..." (src/agent_loop.py); deep
research had no equivalent. These tests pin that the current year now reaches
the LLM at both the planning and query-generation steps, without needing a live
LLM or DB.
"""
import asyncio
from datetime import datetime
from src.deep_research import (
DeepResearcher,
current_date_context,
RESEARCH_PLAN_PROMPT,
)
def _this_year() -> str:
return datetime.now().astimezone().strftime("%Y")
def test_current_date_context_names_the_real_year():
ctx = current_date_context()
assert _this_year() in ctx
# It must actively steer the model away from training-data years.
assert "training data" in ctx.lower()
def test_generate_queries_prompt_carries_the_current_year():
# Build without the heavy __init__; _generate_queries only needs these.
r = DeepResearcher.__new__(DeepResearcher)
r.research_plan = ""
r.queries_used = set()
seen = {}
async def _fake_llm(messages, **kwargs):
seen["prompt"] = messages[0]["content"]
return '["python tutorials", "python guides"]'
r._llm = _fake_llm
queries = asyncio.run(r._generate_queries("best python tutorials", "", 1))
assert queries # sanity: the JSON array parsed
# The fix: the real current year is in the prompt the LLM actually sees.
assert _this_year() in seen["prompt"]
def test_plan_prompt_carries_the_current_year():
r = DeepResearcher.__new__(DeepResearcher)
seen = {}
async def _fake_llm(messages, **kwargs):
seen["prompt"] = messages[0]["content"]
return "{}"
r._llm = _fake_llm
asyncio.run(r._create_plan("what changed this year"))
assert _this_year() in seen["prompt"]
# The base template itself stays year-agnostic; the year comes from the
# prepended context, proving the wiring (not a hard-coded prompt edit).
assert _this_year() not in RESEARCH_PLAN_PROMPT