Fix issue 135 chat context bleed (#281)

* Fix issue 135 chat context bleed

* Guard task delivery metadata access
This commit is contained in:
Massab K.
2026-06-04 17:27:46 +05:00
committed by GitHub
parent 7b45a94b6d
commit 594775dc4b
5 changed files with 57 additions and 12 deletions

View File

@@ -525,7 +525,24 @@ def setup_chat_routes(
_doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id) _doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id)
active_doc = _owner_session_filter(_doc_q, ctx.user).first() active_doc = _owner_session_filter(_doc_q, ctx.user).first()
if active_doc: if active_doc:
logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}") doc_session = active_doc.session_id
doc_owner = getattr(active_doc, "owner", None)
if doc_owner and ctx.user and doc_owner != ctx.user:
logger.warning(
"[doc-inject] ignoring active_doc_id %s owned by another user",
active_doc_id,
)
active_doc = None
elif doc_session and doc_session != session:
logger.warning(
"[doc-inject] ignoring stale active_doc_id %s from session %s while in session %s",
active_doc_id,
doc_session,
session,
)
active_doc = None
else:
logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
else: else:
logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}") logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}")
if not active_doc: if not active_doc:

View File

@@ -94,7 +94,6 @@ logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api", tags=["sessions"]) router = APIRouter(prefix="/api", tags=["sessions"])
def _current_user_is_admin(request: Request, user: str | None) -> bool: def _current_user_is_admin(request: Request, user: str | None) -> bool:
if not user: if not user:
return False return False
@@ -142,6 +141,17 @@ def _persist_session_headers(session_id: str, headers: dict | None) -> None:
db.close() db.close()
_HIDDEN_SYSTEM_SESSION_NAMES = {
"[Task] Chat Sessions Tidy",
"[Task] Documents Tidy",
"[Task] Memory Tidy",
"[Task] Research Tidy",
"[Task] Email Mark Boundaries",
"[Task] Email Tags",
"[Task] Skills Audit",
}
def _pick_endpoint_for_sort(owner=None): def _pick_endpoint_for_sort(owner=None):
"""Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default.""" """Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default."""
from src.endpoint_resolver import resolve_endpoint from src.endpoint_resolver import resolve_endpoint
@@ -265,7 +275,8 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
"message_count": msg_count_map.get(s.id, 0)} "message_count": msg_count_map.get(s.id, 0)}
for s in user_sessions.values() for s in user_sessions.values()
if not s.archived if not s.archived
and (s.name or "").strip() not in ("Nobody", "Incognito")] and (s.name or "").strip() not in ("Nobody", "Incognito")
and (s.name or "").strip() not in _HIDDEN_SYSTEM_SESSION_NAMES]
return sessions return sessions

View File

@@ -38,13 +38,16 @@ class TaskDeferred(BaseException):
async def action_tidy_sessions(owner: str, **kwargs) -> Tuple[str, bool]: async def action_tidy_sessions(owner: str, **kwargs) -> Tuple[str, bool]:
"""Delete empty/throwaway sessions for the owner. Pure heuristic — """Delete empty sessions for the owner. Pure heuristic —
the LLM folder-sort phase is skipped (user opted to keep this task the LLM folder-sort phase is skipped (user opted to keep this task
LLM-free; sorting can be triggered manually via the Chats UI).""" LLM-free; sorting can be triggered manually via the Chats UI)."""
try: try:
import asyncio import asyncio
from src.session_actions import run_auto_sort from src.session_actions import run_auto_sort
result = await asyncio.wait_for(run_auto_sort(owner, skip_llm=True), timeout=60) result = await asyncio.wait_for(
run_auto_sort(owner, skip_llm=True, delete_throwaway=False),
timeout=60,
)
return result, True return result, True
except asyncio.TimeoutError: except asyncio.TimeoutError:
logger.error("tidy_sessions action timed out") logger.error("tidy_sessions action timed out")

View File

@@ -8,7 +8,7 @@ and the task scheduler / builtin actions system.
import json import json
import logging import logging
import re import re
from datetime import datetime from datetime import datetime, timedelta
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -22,9 +22,10 @@ _THROWAWAY_NAMES = {
"ok", "lol", "bruh", "hmm", "hm", "meh", "ok", "lol", "bruh", "hmm", "hm", "meh",
} }
_THROWAWAY_MAX_MESSAGES = 4 _THROWAWAY_MAX_MESSAGES = 4
_FRESH_EMPTY_SESSION_GRACE = timedelta(minutes=10)
async def run_auto_sort(owner: str, skip_llm: bool = False) -> str: async def run_auto_sort(owner: str, skip_llm: bool = False, delete_throwaway: bool = True) -> str:
"""Run session cleanup + (optional) AI folder sort for the given owner. """Run session cleanup + (optional) AI folder sort for the given owner.
Args: Args:
@@ -32,6 +33,7 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
skip_llm: when True, do only Phase 1 (delete empty/throwaway sessions); skip_llm: when True, do only Phase 1 (delete empty/throwaway sessions);
skip Phase 2 (AI folder assignment). Used by the built-in daily skip Phase 2 (AI folder assignment). Used by the built-in daily
background sweep so it never burns LLM tokens. background sweep so it never burns LLM tokens.
delete_throwaway: when False, only empty/incognito sessions are deleted.
Returns a human-readable summary of what was done. Returns a human-readable summary of what was done.
""" """
@@ -53,6 +55,8 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
for row in rows: for row in rows:
if getattr(row, 'is_important', False): if getattr(row, 'is_important', False):
continue continue
created_at = row.created_at or row.updated_at or datetime.utcnow()
is_fresh = (datetime.utcnow() - created_at) < _FRESH_EMPTY_SESSION_GRACE
if (row.name or "").strip() == "Incognito": if (row.name or "").strip() == "Incognito":
deleted_throwaway += 1 deleted_throwaway += 1
db.delete(row) db.delete(row)
@@ -64,9 +68,11 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
should_delete = False should_delete = False
if msg_count == 0: if msg_count == 0:
if is_fresh:
continue
should_delete = True should_delete = True
deleted_empty += 1 deleted_empty += 1
elif msg_count <= _THROWAWAY_MAX_MESSAGES: elif delete_throwaway and msg_count <= _THROWAWAY_MAX_MESSAGES:
name = (row.name or "").strip().lower() name = (row.name or "").strip().lower()
first_msg = db.query(DbMsg.content).filter( first_msg = db.query(DbMsg.content).filter(
DbMsg.session_id == row.id, DbMsg.role == "user" DbMsg.session_id == row.id, DbMsg.role == "user"

View File

@@ -979,10 +979,10 @@ class TaskScheduler:
task = db.query(ScheduledTask).filter(ScheduledTask.id == task_id).first() task = db.query(ScheduledTask).filter(ScheduledTask.id == task_id).first()
if not task: if not task:
return True return True
task_type = task.task_type or "llm" task_type = getattr(task, "task_type", "") or "llm"
if task_type != "action": if task_type != "action":
return True return True
return (task.action or "") in self._MODEL_BACKED_ACTIONS return (getattr(task, "action", "") or "") in self._MODEL_BACKED_ACTIONS
finally: finally:
db.close() db.close()
@@ -992,7 +992,7 @@ class TaskScheduler:
if "check-in" in (task.name or "").lower(): if "check-in" in (task.name or "").lower():
return return
# Built-in housekeeping noise stays out of the chat. # Built-in housekeeping noise stays out of the chat.
if (task.action or "") in self._SILENT_ACTIONS: if (getattr(task, "action", "") or "") in self._SILENT_ACTIONS:
return return
from src.assistant_log import log_to_assistant from src.assistant_log import log_to_assistant
log_to_assistant( log_to_assistant(
@@ -1408,6 +1408,12 @@ class TaskScheduler:
from core.database import Session as DbSession, ChatMessage, CrewMember from core.database import Session as DbSession, ChatMessage, CrewMember
output = task.output_target or "session" output = task.output_target or "session"
if (
output == "session"
and (getattr(task, "task_type", "") or "") == "action"
and (getattr(task, "action", "") or "") in self._SILENT_ACTIONS
):
return
if output.startswith("mcp__"): if output.startswith("mcp__"):
await self._deliver_via_mcp(output, task, result) await self._deliver_via_mcp(output, task, result)
return return
@@ -2069,6 +2075,8 @@ class TaskScheduler:
# Built-in housekeeping/action jobs should not create browser # Built-in housekeeping/action jobs should not create browser
# task notifications; user AI/research tasks still can. # task notifications; user AI/research tasks still can.
task.notifications_enabled = False task.notifications_enabled = False
if (task.output_target or "session") == "session":
task.output_target = defs.get("output_target", "none")
seeded = [] seeded = []
for action, defs in HOUSEKEEPING_DEFAULTS.items(): for action, defs in HOUSEKEEPING_DEFAULTS.items():
if action in existing_actions: if action in existing_actions:
@@ -2099,7 +2107,7 @@ class TaskScheduler:
# AI/email/calendar tasks opt into a paused starting state # AI/email/calendar tasks opt into a paused starting state
# via ship_paused so users can enable them deliberately. # via ship_paused so users can enable them deliberately.
status="paused" if ships_paused else "active", status="paused" if ships_paused else "active",
output_target="session", output_target=defs.get("output_target", "none"),
notifications_enabled=False, notifications_enabled=False,
) )
db.add(task) db.add(task)