From c3228f8b5960f1fb0f9ba7e7d88fd9cfcd0c6baa Mon Sep 17 00:00:00 2001 From: Tushar-Projects <204472109+Tushar-Projects@users.noreply.github.com> Date: Tue, 2 Jun 2026 17:27:42 +0530 Subject: [PATCH] Background tasks: respect active session model fallback --- routes/chat_helpers.py | 3 +++ services/memory/memory_extractor.py | 27 +++++++++++++++++++++++++-- services/memory/skill_extractor.py | 20 +++++++++++++++++++- src/endpoint_resolver.py | 11 ++++++++--- 4 files changed, 55 insertions(+), 6 deletions(-) diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index 7e7a764..06d886d 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -149,6 +149,9 @@ async def auto_name_session(session_manager, sess): t_url, t_model, t_headers = resolve_task_endpoint( sess.endpoint_url, sess.model, sess.headers, ) + if not t_model: + logger.debug("[auto-name] No model provided, skipping") + return # max_tokens big enough that reasoning models (Minimax M2, # DeepSeek R1, QwQ, etc.) have headroom for diff --git a/services/memory/memory_extractor.py b/services/memory/memory_extractor.py index 0f82ba8..c994bb9 100644 --- a/services/memory/memory_extractor.py +++ b/services/memory/memory_extractor.py @@ -235,6 +235,10 @@ async def extract_and_store( Designed to run as a background task (asyncio.create_task). Errors are logged, never raised. """ + if not endpoint_url or not model: + logger.debug("[memory-extract] No model or URL provided, skipping") + return + try: from src.llm_core import llm_call_async @@ -245,11 +249,30 @@ async def extract_and_store( if len(recent) < 2: return # Need at least a user message and assistant response - fallback_facts = _fallback_memory_candidates(recent) + # Strip media (images/audio) from messages — background memory extraction + # only needs the text. The VL-generated descriptions are already in the + # text content of the messages. This avoids sending image tokens to + # non-vision models and prevents accidental "vision grounding" triggers. + stripped_recent = [] + for msg in recent: + role = msg.get("role") + content = msg.get("content", "") + if isinstance(content, list): + # Filter out multimodal blocks that aren't text + text_only = [b for b in content if isinstance(b, dict) and b.get("type") == "text"] + if not text_only and content: + continue + content = text_only + stripped_recent.append({"role": role, "content": content}) + + if not stripped_recent: + return + + fallback_facts = _fallback_memory_candidates(stripped_recent) extraction_messages = [ {"role": "system", "content": EXTRACT_SYSTEM_PROMPT}, - ] + recent + ] + stripped_recent facts = [] try: diff --git a/services/memory/skill_extractor.py b/services/memory/skill_extractor.py index e0f3e3d..4e267a3 100644 --- a/services/memory/skill_extractor.py +++ b/services/memory/skill_extractor.py @@ -59,6 +59,10 @@ async def maybe_extract_skill( owner: Optional[str] = None, ): """Extract a skill if the agent run was complex enough.""" + if not model: + logger.debug("[skill-extract] No model provided, skipping") + return None + # Quiet by default; flip to DEBUG when chasing extractor issues. logger.debug( "[skill-extract] start: rounds=%d tools=%d model=%s owner=%s", @@ -78,9 +82,23 @@ async def maybe_extract_skill( logger.debug("[skill-extract] no recent messages, skipping") return None + # Strip media (images/audio) from messages + stripped_recent = [] + for msg in recent: + content = msg.get("content", "") + if isinstance(content, list): + text_only = [b for b in content if isinstance(b, dict) and b.get("type") == "text"] + if not text_only and content: + continue + content = text_only + stripped_recent.append({"role": msg.get("role"), "content": content}) + + if not stripped_recent: + return None + # Build conversation summary for extraction conv_lines = [] - for msg in recent: + for msg in stripped_recent: role = msg.get("role", "?") content = msg.get("content", "") if isinstance(content, list): diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index f0cd163..55f1b43 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -234,9 +234,14 @@ def resolve_endpoint( ep_id = _stg(f"{setting_prefix}_endpoint_id") model = _stg(f"{setting_prefix}_model") - # Unset Utility means "same as Default Chat Model". This keeps background - # features usable out of the box and lets users override Utility only when - # they explicitly want a separate cheaper/faster model. + # If the specific endpoint is not configured, but the caller provided a + # valid fallback (e.g. the active session model), use that immediately. + # This prevents background tasks from jumping to the global default_model + # when the user is mid-conversation with a different model. + if not ep_id and fallback_url and fallback_model: + return fallback_url, fallback_model, fallback_headers + + # Unset Utility means "same as Default Chat Model". if setting_prefix == "utility" and not ep_id: ep_id = _stg("default_endpoint_id") model = _stg("default_model")