fix(compare): stop blind mode leaking model identities via session names (#1318)
Blind Compare anonymized the pane headers, but each pane still created a helper chat session named "[CMP] <real-model>" and GET /api/sessions returned the session's model field. So the sidebar and the session-list API let a user map "Model A" back to its real model before voting, defeating the blind test.
- Frontend (static/js/compare/index.js, panes.js): in blind mode, name helper sessions by their neutral slot ("[CMP] Model A") instead of the model, matching the existing blind pane labels.
- Backend GET /api/sessions (routes/session_routes.py): blank the model field for [CMP]-prefixed helper sessions via a new _public_model helper.
- Backend /api/compare/start (routes/compare_routes.py): name blind sessions by slot and withhold model_left/model_right/mapping from the blind response (revealed at /vote).
- Tests: tests/test_blind_compare_redaction.py.
Fixes #1285.
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -66,13 +66,33 @@ def setup_compare_routes(session_manager: SessionManager):
|
|||||||
comp_id = str(uuid.uuid4())
|
comp_id = str(uuid.uuid4())
|
||||||
sid_a = str(uuid.uuid4())
|
sid_a = str(uuid.uuid4())
|
||||||
sid_b = str(uuid.uuid4())
|
sid_b = str(uuid.uuid4())
|
||||||
|
user = getattr(request.state, 'current_user', None)
|
||||||
|
|
||||||
|
# Blind mapping: randomly assign left/right
|
||||||
|
blind = str(is_blind).lower() == "true"
|
||||||
|
if blind:
|
||||||
|
mapping = {"left": "a", "right": "b"}
|
||||||
|
if random.random() > 0.5:
|
||||||
|
mapping = {"left": "b", "right": "a"}
|
||||||
|
else:
|
||||||
|
mapping = {"left": "a", "right": "b"}
|
||||||
|
|
||||||
|
# Map session IDs to left/right based on blind mapping
|
||||||
|
session_left = sid_a if mapping["left"] == "a" else sid_b
|
||||||
|
session_right = sid_a if mapping["right"] == "a" else sid_b
|
||||||
|
|
||||||
|
# In blind mode, name the helper sessions by their neutral slot
|
||||||
|
# ("Model A" / "Model B") instead of the real model. Otherwise the
|
||||||
|
# session name leaks the model in the sidebar and GET /api/sessions,
|
||||||
|
# de-anonymizing the comparison before the user votes (issue #1285).
|
||||||
|
slot_name = {session_left: "Model A", session_right: "Model B"}
|
||||||
|
|
||||||
# Create ephemeral sessions (prefixed [CMP])
|
# Create ephemeral sessions (prefixed [CMP])
|
||||||
for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]:
|
for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]:
|
||||||
user = getattr(request.state, 'current_user', None)
|
name = f"[CMP] {slot_name[sid]}" if blind else f"[CMP] {model.split('/')[-1]}"
|
||||||
session_manager.create_session(
|
session_manager.create_session(
|
||||||
session_id=sid,
|
session_id=sid,
|
||||||
name=f"[CMP] {model.split('/')[-1]}",
|
name=name,
|
||||||
endpoint_url=endpoint,
|
endpoint_url=endpoint,
|
||||||
model=model,
|
model=model,
|
||||||
rag=False,
|
rag=False,
|
||||||
@@ -93,15 +113,6 @@ def setup_compare_routes(session_manager: SessionManager):
|
|||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
# Blind mapping: randomly assign left/right
|
|
||||||
blind = str(is_blind).lower() == "true"
|
|
||||||
if blind:
|
|
||||||
mapping = {"left": "a", "right": "b"}
|
|
||||||
if random.random() > 0.5:
|
|
||||||
mapping = {"left": "b", "right": "a"}
|
|
||||||
else:
|
|
||||||
mapping = {"left": "a", "right": "b"}
|
|
||||||
|
|
||||||
# Store comparison record
|
# Store comparison record
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
try:
|
try:
|
||||||
@@ -121,18 +132,18 @@ def setup_compare_routes(session_manager: SessionManager):
|
|||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
# Map session IDs to left/right based on blind mapping
|
# In blind mode, withhold the model identities AND the left/right
|
||||||
session_left = sid_a if mapping["left"] == "a" else sid_b
|
# mapping from the response. The client already knows model_a/model_b
|
||||||
session_right = sid_a if mapping["right"] == "a" else sid_b
|
# (it sent them), so returning either would defeat blind mode. They are
|
||||||
|
# revealed by POST /api/compare/{id}/vote once the user has voted (#1285).
|
||||||
return {
|
return {
|
||||||
"id": comp_id,
|
"id": comp_id,
|
||||||
"session_left": session_left,
|
"session_left": session_left,
|
||||||
"session_right": session_right,
|
"session_right": session_right,
|
||||||
"model_left": model_a if mapping["left"] == "a" else model_b,
|
"model_left": None if blind else (model_a if mapping["left"] == "a" else model_b),
|
||||||
"model_right": model_a if mapping["right"] == "a" else model_b,
|
"model_right": None if blind else (model_a if mapping["right"] == "a" else model_b),
|
||||||
"is_blind": blind,
|
"is_blind": blind,
|
||||||
"mapping": mapping,
|
"mapping": None if blind else mapping,
|
||||||
}
|
}
|
||||||
|
|
||||||
@router.post("/{comp_id}/vote")
|
@router.post("/{comp_id}/vote")
|
||||||
|
|||||||
@@ -21,6 +21,22 @@ def _sanitize_export_filename(name: str) -> str:
|
|||||||
return name[:128]
|
return name[:128]
|
||||||
|
|
||||||
|
|
||||||
|
# Blind-compare helper sessions are created with this name prefix. Their real
|
||||||
|
# model must never surface in the session list / sidebar — otherwise a blind
|
||||||
|
# comparison can be de-anonymized before the user votes (issue #1285).
|
||||||
|
COMPARE_SESSION_PREFIX = "[CMP] "
|
||||||
|
|
||||||
|
|
||||||
|
def _public_model(name: str, model: str) -> str:
|
||||||
|
"""Blank out the real model of blind-compare helper sessions so the
|
||||||
|
session list can't be used to map a neutral pane label ("Model A") back
|
||||||
|
to its model. The Compare UI tracks models client-side, so hiding it here
|
||||||
|
costs the sidebar nothing. See issue #1285."""
|
||||||
|
if (name or "").startswith(COMPARE_SESSION_PREFIX):
|
||||||
|
return ""
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
def _verify_session_owner(request: Request, session_id: str, session_manager=None):
|
def _verify_session_owner(request: Request, session_id: str, session_manager=None):
|
||||||
"""Verify the current user owns the session. Raises 404 if not.
|
"""Verify the current user owns the session. Raises 404 if not.
|
||||||
|
|
||||||
@@ -215,7 +231,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
|
|||||||
finally:
|
finally:
|
||||||
db.close()
|
db.close()
|
||||||
|
|
||||||
sessions = [{"id": s.id, "name": s.name, "model": s.model,
|
sessions = [{"id": s.id, "name": s.name, "model": _public_model(s.name, s.model),
|
||||||
"endpoint_url": s.endpoint_url, "rag": s.rag,
|
"endpoint_url": s.endpoint_url, "rag": s.rag,
|
||||||
"archived": s.archived, "folder": folder_map.get(s.id),
|
"archived": s.archived, "folder": folder_map.get(s.id),
|
||||||
"total_tokens": token_map.get(s.id, 0),
|
"total_tokens": token_map.get(s.id, 0),
|
||||||
|
|||||||
@@ -210,7 +210,9 @@ async function _buildCompareUI() {
|
|||||||
for (let i = 0; i < n; i++) {
|
for (let i = 0; i < n; i++) {
|
||||||
const m = state._selectedModels[i];
|
const m = state._selectedModels[i];
|
||||||
const fd = new FormData();
|
const fd = new FormData();
|
||||||
fd.append('name', '[CMP] ' + modelShorts[i]);
|
// Blind mode: name the session by its neutral slot so the sidebar /
|
||||||
|
// GET /api/sessions can't de-anonymize the comparison (issue #1285).
|
||||||
|
fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(i) : modelShorts[i]));
|
||||||
fd.append('endpoint_url', m.endpoint || '');
|
fd.append('endpoint_url', m.endpoint || '');
|
||||||
fd.append('model', m.model || '');
|
fd.append('model', m.model || '');
|
||||||
if (m.endpointId) {
|
if (m.endpointId) {
|
||||||
|
|||||||
@@ -382,7 +382,8 @@ async function _createAndAppendPane(m) {
|
|||||||
|
|
||||||
// Create session
|
// Create session
|
||||||
const fd = new FormData();
|
const fd = new FormData();
|
||||||
fd.append('name', '[CMP] ' + m.name);
|
// Blind mode: neutral slot name only — never leak the model (issue #1285).
|
||||||
|
fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(i) : m.name));
|
||||||
fd.append('endpoint_url', m.url || '');
|
fd.append('endpoint_url', m.url || '');
|
||||||
fd.append('model', m.id || '');
|
fd.append('model', m.id || '');
|
||||||
if (m.endpointId) {
|
if (m.endpointId) {
|
||||||
@@ -584,7 +585,8 @@ function _showModelSwapDropdown(paneIdx, titleBtn) {
|
|||||||
fetch(`${state.API_BASE}/api/session/${oldSid}`, { method: 'DELETE' }).catch(() => {});
|
fetch(`${state.API_BASE}/api/session/${oldSid}`, { method: 'DELETE' }).catch(() => {});
|
||||||
}
|
}
|
||||||
const fd = new FormData();
|
const fd = new FormData();
|
||||||
fd.append('name', '[CMP] ' + m.name);
|
// Blind mode: neutral slot name only — never leak the model (issue #1285).
|
||||||
|
fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(paneIdx) : m.name));
|
||||||
fd.append('endpoint_url', m.url || '');
|
fd.append('endpoint_url', m.url || '');
|
||||||
fd.append('model', m.id || '');
|
fd.append('model', m.id || '');
|
||||||
if (m.endpointId) {
|
if (m.endpointId) {
|
||||||
|
|||||||
100
tests/test_blind_compare_redaction.py
Normal file
100
tests/test_blind_compare_redaction.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
"""Regression tests for issue #1285 — blind Compare must not leak model
|
||||||
|
identities through helper-session names or GET /api/sessions.
|
||||||
|
|
||||||
|
Two guards are pinned here:
|
||||||
|
|
||||||
|
1. Backend: ``routes.session_routes._public_model`` blanks the ``model`` field
|
||||||
|
of any ``[CMP] …`` helper session in the session list, so the sidebar /
|
||||||
|
``/api/sessions`` can't be used to map a neutral pane label ("Model A")
|
||||||
|
back to its real model.
|
||||||
|
2. Frontend: every ``[CMP]`` session name built in ``static/js/compare/`` is
|
||||||
|
guarded by ``state._blindMode`` so blind sessions are named by slot rather
|
||||||
|
than by the real model.
|
||||||
|
|
||||||
|
The backend import mirrors tests/test_session_ghost_delete.py: stub the heavy
|
||||||
|
ORM modules so the real route module imports under conftest's MagicMock
|
||||||
|
sqlalchemy stub, then restore sys.modules so the stubs don't leak into sibling
|
||||||
|
test modules.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import importlib
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
_REPO = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
|
# Mirror tests/test_session_ghost_delete.py exactly: stub only the ORM *class*
|
||||||
|
# modules and import the REAL core.session_manager + src.auth_helpers. pytest
|
||||||
|
# caches routes.session_routes after the first import, so stubbing auth_helpers /
|
||||||
|
# session_manager here would poison the shared module for the sibling session
|
||||||
|
# tests (whichever file is collected first wins). Matching their stub set keeps
|
||||||
|
# the cached module identical regardless of collection order.
|
||||||
|
_ABSENT = object()
|
||||||
|
_TEMP_STUBS = ("core.database", "core.models", "src.request_models")
|
||||||
|
_saved = {name: sys.modules.get(name, _ABSENT) for name in _TEMP_STUBS}
|
||||||
|
_saved["core.session_manager"] = sys.modules.get("core.session_manager", _ABSENT)
|
||||||
|
try:
|
||||||
|
for _name in _TEMP_STUBS:
|
||||||
|
sys.modules[_name] = MagicMock(name=_name)
|
||||||
|
if isinstance(sys.modules.get("core.session_manager"), MagicMock):
|
||||||
|
del sys.modules["core.session_manager"]
|
||||||
|
importlib.import_module("core.session_manager")
|
||||||
|
import routes.session_routes as SR # noqa: E402
|
||||||
|
finally:
|
||||||
|
for _name, _val in _saved.items():
|
||||||
|
if _val is _ABSENT:
|
||||||
|
sys.modules.pop(_name, None)
|
||||||
|
else:
|
||||||
|
sys.modules[_name] = _val
|
||||||
|
|
||||||
|
|
||||||
|
# ── backend: GET /api/sessions model redaction ─────────────────────────────
|
||||||
|
|
||||||
|
def test_public_model_blanks_blind_compare_sessions():
|
||||||
|
"""A blind-compare helper session ("[CMP] Model A") must not expose its
|
||||||
|
real model in the session list — that is the de-anonymization vector."""
|
||||||
|
assert SR._public_model("[CMP] Model A", "gpt-4o") == ""
|
||||||
|
assert SR._public_model("[CMP] Model B", "llama-3.1-70b") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_public_model_blanks_any_cmp_prefixed_session():
|
||||||
|
"""Defense in depth: even a non-blind [CMP] session (named after the real
|
||||||
|
model) gets its model field blanked. The name already carries whatever the
|
||||||
|
user chose to reveal, and the session list never needs the raw model."""
|
||||||
|
assert SR._public_model("[CMP] gpt-4o", "gpt-4o") == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_public_model_preserves_normal_sessions():
|
||||||
|
"""Ordinary chats are untouched — only the [CMP] prefix triggers redaction.
|
||||||
|
The post-vote "Compare: a vs b" folder is a normal session, not a helper."""
|
||||||
|
assert SR._public_model("My research chat", "gpt-4o") == "gpt-4o"
|
||||||
|
assert SR._public_model("", "claude-sonnet") == "claude-sonnet"
|
||||||
|
assert SR._public_model("Compare: gpt-4o vs llama", "gpt-4o") == "gpt-4o"
|
||||||
|
|
||||||
|
|
||||||
|
def test_compare_prefix_constant_matches_frontend():
|
||||||
|
"""The redaction prefix must match what the frontend prepends, or the
|
||||||
|
guard silently stops matching new sessions."""
|
||||||
|
assert SR.COMPARE_SESSION_PREFIX == "[CMP] "
|
||||||
|
|
||||||
|
|
||||||
|
# ── frontend: every [CMP] session name is blind-guarded ────────────────────
|
||||||
|
|
||||||
|
def test_compare_session_names_are_blind_guarded():
|
||||||
|
"""Every line in static/js/compare/ that builds a '[CMP]' session name
|
||||||
|
must branch on state._blindMode, so a blind comparison is never named
|
||||||
|
after its real model. Pins the #1285 fix against regressions."""
|
||||||
|
compare_dir = _REPO / "static" / "js" / "compare"
|
||||||
|
assert compare_dir.is_dir(), f"missing {compare_dir}"
|
||||||
|
offenders = []
|
||||||
|
for path in sorted(compare_dir.glob("*.js")):
|
||||||
|
for lineno, line in enumerate(
|
||||||
|
path.read_text(encoding="utf-8").splitlines(), 1
|
||||||
|
):
|
||||||
|
if "'[CMP] '" in line and "_blindMode" not in line:
|
||||||
|
offenders.append(f"{path.name}:{lineno}: {line.strip()}")
|
||||||
|
assert not offenders, (
|
||||||
|
"Compare session names must be blind-guarded (issue #1285):\n"
|
||||||
|
+ "\n".join(offenders)
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user