fix(compare): stop blind mode leaking model identities via session names (#1318)

Blind Compare anonymized the pane headers, but each pane still created a helper chat session named "[CMP] <real-model>" and GET /api/sessions returned the session's model field. So the sidebar and the session-list API let a user map "Model A" back to its real model before voting, defeating the blind test. - Frontend (static/js/compare/index.js, panes.js): in blind mode, name helper sessions by their neutral slot ("[CMP] Model A") instead of the model, matching the existing blind pane labels. - Backend GET /api/sessions (routes/session_routes.py): blank the model field for [CMP]-prefixed helper sessions via a new _public_model helper. - Backend /api/compare/start (routes/compare_routes.py): name blind sessions by slot and withhold model_left/model_right/mapping from the blind response (revealed at /vote). - Tests: tests/test_blind_compare_redaction.py. Fixes #1285. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 06:39:01 +03:00
parent 3d8c364689
commit 1c43daa564
5 changed files with 153 additions and 22 deletions
--- a/routes/compare_routes.py
+++ b/routes/compare_routes.py
@@ -66,13 +66,33 @@ def setup_compare_routes(session_manager: SessionManager):
        comp_id = str(uuid.uuid4())
        sid_a = str(uuid.uuid4())
        sid_b = str(uuid.uuid4())
        user = getattr(request.state, 'current_user', None)
        # Blind mapping: randomly assign left/right
        blind = str(is_blind).lower() == "true"
        if blind:
            mapping = {"left": "a", "right": "b"}
            if random.random() > 0.5:
                mapping = {"left": "b", "right": "a"}
        else:
            mapping = {"left": "a", "right": "b"}
        # Map session IDs to left/right based on blind mapping
        session_left = sid_a if mapping["left"] == "a" else sid_b
        session_right = sid_a if mapping["right"] == "a" else sid_b
        # In blind mode, name the helper sessions by their neutral slot
        # ("Model A" / "Model B") instead of the real model. Otherwise the
        # session name leaks the model in the sidebar and GET /api/sessions,
        # de-anonymizing the comparison before the user votes (issue #1285).
        slot_name = {session_left: "Model A", session_right: "Model B"}
        # Create ephemeral sessions (prefixed [CMP])
        for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]:
-            user = getattr(request.state, 'current_user', None)
+            name = f"[CMP] {slot_name[sid]}" if blind else f"[CMP] {model.split('/')[-1]}"
            session_manager.create_session(
                session_id=sid,
-                name=f"[CMP] {model.split('/')[-1]}",
+                name=name,
                endpoint_url=endpoint,
                model=model,
                rag=False,
@@ -93,15 +113,6 @@ def setup_compare_routes(session_manager: SessionManager):
            finally:
                db.close()
        # Blind mapping: randomly assign left/right
        blind = str(is_blind).lower() == "true"
        if blind:
            mapping = {"left": "a", "right": "b"}
            if random.random() > 0.5:
                mapping = {"left": "b", "right": "a"}
        else:
            mapping = {"left": "a", "right": "b"}
        # Store comparison record
        db = SessionLocal()
        try:
@@ -121,18 +132,18 @@ def setup_compare_routes(session_manager: SessionManager):
        finally:
            db.close()
-        # Map session IDs to left/right based on blind mapping
+        # In blind mode, withhold the model identities AND the left/right
-        session_left = sid_a if mapping["left"] == "a" else sid_b
+        # mapping from the response. The client already knows model_a/model_b
-        session_right = sid_a if mapping["right"] == "a" else sid_b
+        # (it sent them), so returning either would defeat blind mode. They are
-
+        # revealed by POST /api/compare/{id}/vote once the user has voted (#1285).
        return {
            "id": comp_id,
            "session_left": session_left,
            "session_right": session_right,
-            "model_left": model_a if mapping["left"] == "a" else model_b,
+            "model_left": None if blind else (model_a if mapping["left"] == "a" else model_b),
-            "model_right": model_a if mapping["right"] == "a" else model_b,
+            "model_right": None if blind else (model_a if mapping["right"] == "a" else model_b),
            "is_blind": blind,
-            "mapping": mapping,
+            "mapping": None if blind else mapping,
        }
    @router.post("/{comp_id}/vote")
--- a/routes/session_routes.py
+++ b/routes/session_routes.py
@@ -21,6 +21,22 @@ def _sanitize_export_filename(name: str) -> str:
    return name[:128]
 # Blind-compare helper sessions are created with this name prefix. Their real
 # model must never surface in the session list / sidebar — otherwise a blind
 # comparison can be de-anonymized before the user votes (issue #1285).
 COMPARE_SESSION_PREFIX = "[CMP] "
 def _public_model(name: str, model: str) -> str:
    """Blank out the real model of blind-compare helper sessions so the
    session list can't be used to map a neutral pane label ("Model A") back
    to its model. The Compare UI tracks models client-side, so hiding it here
    costs the sidebar nothing. See issue #1285."""
    if (name or "").startswith(COMPARE_SESSION_PREFIX):
        return ""
    return model
 def _verify_session_owner(request: Request, session_id: str, session_manager=None):
    """Verify the current user owns the session. Raises 404 if not.
@@ -215,7 +231,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        finally:
            db.close()
-        sessions = [{"id": s.id, "name": s.name, "model": s.model,
+        sessions = [{"id": s.id, "name": s.name, "model": _public_model(s.name, s.model),
                     "endpoint_url": s.endpoint_url, "rag": s.rag,
                     "archived": s.archived, "folder": folder_map.get(s.id),
                     "total_tokens": token_map.get(s.id, 0),
--- a/static/js/compare/index.js
+++ b/static/js/compare/index.js
@@ -210,7 +210,9 @@ async function _buildCompareUI() {
    for (let i = 0; i < n; i++) {
      const m = state._selectedModels[i];
      const fd = new FormData();
-      fd.append('name', '[CMP] ' + modelShorts[i]);
+      // Blind mode: name the session by its neutral slot so the sidebar /
      // GET /api/sessions can't de-anonymize the comparison (issue #1285).
      fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(i) : modelShorts[i]));
      fd.append('endpoint_url', m.endpoint || '');
      fd.append('model', m.model || '');
      if (m.endpointId) {
--- a/static/js/compare/panes.js
+++ b/static/js/compare/panes.js
@@ -382,7 +382,8 @@ async function _createAndAppendPane(m) {
  // Create session
  const fd = new FormData();
-  fd.append('name', '[CMP] ' + m.name);
+  // Blind mode: neutral slot name only — never leak the model (issue #1285).
  fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(i) : m.name));
  fd.append('endpoint_url', m.url || '');
  fd.append('model', m.id || '');
  if (m.endpointId) {
@@ -584,7 +585,8 @@ function _showModelSwapDropdown(paneIdx, titleBtn) {
        fetch(`${state.API_BASE}/api/session/${oldSid}`, { method: 'DELETE' }).catch(() => {});
      }
      const fd = new FormData();
-      fd.append('name', '[CMP] ' + m.name);
+      // Blind mode: neutral slot name only — never leak the model (issue #1285).
      fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(paneIdx) : m.name));
      fd.append('endpoint_url', m.url || '');
      fd.append('model', m.id || '');
      if (m.endpointId) {
--- a/tests/test_blind_compare_redaction.py
+++ b/tests/test_blind_compare_redaction.py
@@ -0,0 +1,100 @@
 """Regression tests for issue #1285 — blind Compare must not leak model
 identities through helper-session names or GET /api/sessions.
 Two guards are pinned here:
 1. Backend: ``routes.session_routes._public_model`` blanks the ``model`` field
   of any ``[CMP] …`` helper session in the session list, so the sidebar /
   ``/api/sessions`` can't be used to map a neutral pane label ("Model A")
   back to its real model.
 2. Frontend: every ``[CMP]`` session name built in ``static/js/compare/`` is
   guarded by ``state._blindMode`` so blind sessions are named by slot rather
   than by the real model.
 The backend import mirrors tests/test_session_ghost_delete.py: stub the heavy
 ORM modules so the real route module imports under conftest's MagicMock
 sqlalchemy stub, then restore sys.modules so the stubs don't leak into sibling
 test modules.
 """
 import sys
 import importlib
 from pathlib import Path
 from unittest.mock import MagicMock
 _REPO = Path(__file__).resolve().parent.parent
 # Mirror tests/test_session_ghost_delete.py exactly: stub only the ORM *class*
 # modules and import the REAL core.session_manager + src.auth_helpers. pytest
 # caches routes.session_routes after the first import, so stubbing auth_helpers /
 # session_manager here would poison the shared module for the sibling session
 # tests (whichever file is collected first wins). Matching their stub set keeps
 # the cached module identical regardless of collection order.
 _ABSENT = object()
 _TEMP_STUBS = ("core.database", "core.models", "src.request_models")
 _saved = {name: sys.modules.get(name, _ABSENT) for name in _TEMP_STUBS}
 _saved["core.session_manager"] = sys.modules.get("core.session_manager", _ABSENT)
 try:
    for _name in _TEMP_STUBS:
        sys.modules[_name] = MagicMock(name=_name)
    if isinstance(sys.modules.get("core.session_manager"), MagicMock):
        del sys.modules["core.session_manager"]
    importlib.import_module("core.session_manager")
    import routes.session_routes as SR  # noqa: E402
 finally:
    for _name, _val in _saved.items():
        if _val is _ABSENT:
            sys.modules.pop(_name, None)
        else:
            sys.modules[_name] = _val
 # ── backend: GET /api/sessions model redaction ─────────────────────────────
 def test_public_model_blanks_blind_compare_sessions():
    """A blind-compare helper session ("[CMP] Model A") must not expose its
    real model in the session list — that is the de-anonymization vector."""
    assert SR._public_model("[CMP] Model A", "gpt-4o") == ""
    assert SR._public_model("[CMP] Model B", "llama-3.1-70b") == ""
 def test_public_model_blanks_any_cmp_prefixed_session():
    """Defense in depth: even a non-blind [CMP] session (named after the real
    model) gets its model field blanked. The name already carries whatever the
    user chose to reveal, and the session list never needs the raw model."""
    assert SR._public_model("[CMP] gpt-4o", "gpt-4o") == ""
 def test_public_model_preserves_normal_sessions():
    """Ordinary chats are untouched — only the [CMP] prefix triggers redaction.
    The post-vote "Compare: a vs b" folder is a normal session, not a helper."""
    assert SR._public_model("My research chat", "gpt-4o") == "gpt-4o"
    assert SR._public_model("", "claude-sonnet") == "claude-sonnet"
    assert SR._public_model("Compare: gpt-4o vs llama", "gpt-4o") == "gpt-4o"
 def test_compare_prefix_constant_matches_frontend():
    """The redaction prefix must match what the frontend prepends, or the
    guard silently stops matching new sessions."""
    assert SR.COMPARE_SESSION_PREFIX == "[CMP] "
 # ── frontend: every [CMP] session name is blind-guarded ────────────────────
 def test_compare_session_names_are_blind_guarded():
    """Every line in static/js/compare/ that builds a '[CMP]' session name
    must branch on state._blindMode, so a blind comparison is never named
    after its real model. Pins the #1285 fix against regressions."""
    compare_dir = _REPO / "static" / "js" / "compare"
    assert compare_dir.is_dir(), f"missing {compare_dir}"
    offenders = []
    for path in sorted(compare_dir.glob("*.js")):
        for lineno, line in enumerate(
            path.read_text(encoding="utf-8").splitlines(), 1
        ):
            if "'[CMP] '" in line and "_blindMode" not in line:
                offenders.append(f"{path.name}:{lineno}: {line.strip()}")
    assert not offenders, (
        "Compare session names must be blind-guarded (issue #1285):\n"
        + "\n".join(offenders)
    )