fix(mcp): sanitize and cap rendered MCP tool param hints (#2682)
This commit is contained in:
@@ -8,6 +8,7 @@ Each server exposes tools that are made available to the agent loop.
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -30,6 +31,28 @@ def _format_mcp_connection_error(name: str, command: str = "", args: Optional[Li
|
||||
return raw_error
|
||||
|
||||
|
||||
# Caps for rendering untrusted MCP tool schemas into the agent prompt (issue #2660).
|
||||
# MCP servers are third-party/user-added, so field names and parameter counts are
|
||||
# untrusted input — bound them so an odd or hostile schema cannot distort the prompt.
|
||||
_MCP_PARAM_MAX = 12 # max params rendered per tool
|
||||
_MCP_TOKEN_MAX = 40 # max chars per rendered name / type token
|
||||
_MCP_HINT_MAX = 300 # total-length backstop for the whole hint
|
||||
|
||||
|
||||
def _sanitize_schema_token(value: Any, limit: int = _MCP_TOKEN_MAX) -> str:
|
||||
"""Make an untrusted JSON-Schema token safe to splice into the prompt.
|
||||
|
||||
Replaces control chars / newlines with a space, collapses whitespace, and
|
||||
length-caps the result, so a weird field name or type cannot inject newlines
|
||||
or run on. Normal short identifiers pass through unchanged.
|
||||
"""
|
||||
text = re.sub(r"[\x00-\x1f\x7f]+", " ", str(value))
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
if len(text) > limit:
|
||||
text = text[:limit].rstrip() + "…"
|
||||
return text
|
||||
|
||||
|
||||
def _format_mcp_params(input_schema: Any) -> str:
|
||||
"""Render an MCP tool's JSON-Schema inputs as a compact prompt hint.
|
||||
|
||||
@@ -38,6 +61,9 @@ def _format_mcp_params(input_schema: Any) -> str:
|
||||
` Args (JSON): {"path": string (required), "limit": integer}` — names,
|
||||
coarse types, and required-ness, kept short so it stays prompt-friendly.
|
||||
Returns "" when there are no parameters.
|
||||
|
||||
MCP servers are third-party, so names/types are sanitized and the parameter
|
||||
count + total length are capped (issue #2660); normal schemas are unaffected.
|
||||
"""
|
||||
if not isinstance(input_schema, dict):
|
||||
return ""
|
||||
@@ -46,16 +72,22 @@ def _format_mcp_params(input_schema: Any) -> str:
|
||||
return ""
|
||||
required = set(input_schema.get("required") or [])
|
||||
parts = []
|
||||
for pname, pinfo in props.items():
|
||||
for pname, pinfo in list(props.items())[:_MCP_PARAM_MAX]:
|
||||
pinfo = pinfo if isinstance(pinfo, dict) else {}
|
||||
ptype = pinfo.get("type") or "any"
|
||||
if isinstance(ptype, list):
|
||||
ptype = "|".join(str(x) for x in ptype)
|
||||
tag = f'"{pname}": {ptype}'
|
||||
tag = f'"{_sanitize_schema_token(pname)}": {_sanitize_schema_token(ptype)}'
|
||||
if pname in required:
|
||||
tag += " (required)"
|
||||
parts.append(tag)
|
||||
return " Args (JSON): {" + ", ".join(parts) + "}"
|
||||
extra = len(props) - len(parts)
|
||||
if extra > 0:
|
||||
parts.append(f"…+{extra} more")
|
||||
hint = " Args (JSON): {" + ", ".join(parts) + "}"
|
||||
if len(hint) > _MCP_HINT_MAX:
|
||||
hint = hint[:_MCP_HINT_MAX - 1].rstrip() + "…"
|
||||
return hint
|
||||
|
||||
|
||||
class McpManager:
|
||||
|
||||
Reference in New Issue
Block a user