Inject current date into deep research planning and query prompts (#1347)

Deep research generated search queries from the LLM's training-cutoff
knowledge, so it emitted stale-year queries like "best Python tutorials
2025" when the actual year is later (issue #1341). The chat/agent path
already grounds the model with "Today is ..." (src/agent_loop.py); the
deep research planning and query-generation prompts had no equivalent.

Add a small current_date_context() helper and prepend it at the plan and
query-generation prompt sites (and the research_handler plan preview path
that reuses RESEARCH_PLAN_PROMPT). System-TZ local, portable strftime.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
lekt8
2026-06-03 02:00:52 +08:00
committed by GitHub
parent b2291fad49
commit ce7f5dbbdd
3 changed files with 87 additions and 4 deletions

View File

@@ -11,6 +11,7 @@ import json
import logging
import re
import time
from datetime import datetime
from typing import Callable, Dict, List, Optional, Set
from src.research_utils import strip_thinking, is_low_quality
@@ -19,6 +20,20 @@ from src.goal_based_extractor import EXTRACTOR_PROMPT
logger = logging.getLogger(__name__)
def current_date_context() -> str:
"""Preamble that grounds query-generation/planning LLMs in the real current
date. Without it the model falls back to its training-cutoff year and emits
queries like "best Python tutorials 2025" when the year is actually 2026.
System TZ-local so it matches what the user sees. Portable strftime only."""
now = datetime.now().astimezone()
return (
f"Today's date is {now.strftime('%B %d, %Y')} ({now.strftime('%Y-%m-%d')}). "
f"When a search query needs a year or refers to 'latest'/'current'/"
f"'this year', use {now.strftime('%Y')} or relative wording — never a "
f"year inferred from training data.\n\n"
)
# ---------------------------------------------------------------------------
# Prompts
# ---------------------------------------------------------------------------
@@ -364,7 +379,7 @@ class DeepResearcher:
# ------------------------------------------------------------------
async def _create_plan(self, question: str) -> str:
"""LLM analyzes the question and creates a research plan."""
prompt = RESEARCH_PLAN_PROMPT.format(question=question)
prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=question)
try:
response = await self._llm(
[{"role": "user", "content": prompt}],
@@ -439,7 +454,7 @@ class DeepResearcher:
"that the report doesn't yet cover well."
)
prompt = QUERY_GEN_PROMPT.format(
prompt = current_date_context() + QUERY_GEN_PROMPT.format(
question=question,
research_plan=self.research_plan or "(No plan — search broadly.)",
report=report or "(No findings yet.)",

View File

@@ -161,10 +161,10 @@ class ResearchHandler:
) -> Optional[dict]:
"""Generate a research plan for user review before starting research."""
try:
from src.deep_research import RESEARCH_PLAN_PROMPT
from src.deep_research import RESEARCH_PLAN_PROMPT, current_date_context
from src.llm_core import llm_call_async
prompt = RESEARCH_PLAN_PROMPT.format(question=query)
prompt = current_date_context() + RESEARCH_PLAN_PROMPT.format(question=query)
response = await llm_call_async(
url=llm_endpoint,
model=llm_model,

View File

@@ -0,0 +1,68 @@
"""Regression tests for issue #1341 — deep research used the model's
training-cutoff year (e.g. "best Python tutorials 2025") because the
query-generation and planning prompts never told the LLM the current date.
The chat/agent path already injects "Today is ..." (src/agent_loop.py); deep
research had no equivalent. These tests pin that the current year now reaches
the LLM at both the planning and query-generation steps, without needing a live
LLM or DB.
"""
import asyncio
from datetime import datetime
from src.deep_research import (
DeepResearcher,
current_date_context,
RESEARCH_PLAN_PROMPT,
)
def _this_year() -> str:
return datetime.now().astimezone().strftime("%Y")
def test_current_date_context_names_the_real_year():
ctx = current_date_context()
assert _this_year() in ctx
# It must actively steer the model away from training-data years.
assert "training data" in ctx.lower()
def test_generate_queries_prompt_carries_the_current_year():
# Build without the heavy __init__; _generate_queries only needs these.
r = DeepResearcher.__new__(DeepResearcher)
r.research_plan = ""
r.queries_used = set()
seen = {}
async def _fake_llm(messages, **kwargs):
seen["prompt"] = messages[0]["content"]
return '["python tutorials", "python guides"]'
r._llm = _fake_llm
queries = asyncio.run(r._generate_queries("best python tutorials", "", 1))
assert queries # sanity: the JSON array parsed
# The fix: the real current year is in the prompt the LLM actually sees.
assert _this_year() in seen["prompt"]
def test_plan_prompt_carries_the_current_year():
r = DeepResearcher.__new__(DeepResearcher)
seen = {}
async def _fake_llm(messages, **kwargs):
seen["prompt"] = messages[0]["content"]
return "{}"
r._llm = _fake_llm
asyncio.run(r._create_plan("what changed this year"))
assert _this_year() in seen["prompt"]
# The base template itself stays year-agnostic; the year comes from the
# prepended context, proving the wiring (not a hard-coded prompt edit).
assert _this_year() not in RESEARCH_PLAN_PROMPT