diff --git a/routes/compare_routes.py b/routes/compare_routes.py index 7886762..35cd212 100644 --- a/routes/compare_routes.py +++ b/routes/compare_routes.py @@ -66,13 +66,33 @@ def setup_compare_routes(session_manager: SessionManager): comp_id = str(uuid.uuid4()) sid_a = str(uuid.uuid4()) sid_b = str(uuid.uuid4()) + user = getattr(request.state, 'current_user', None) + + # Blind mapping: randomly assign left/right + blind = str(is_blind).lower() == "true" + if blind: + mapping = {"left": "a", "right": "b"} + if random.random() > 0.5: + mapping = {"left": "b", "right": "a"} + else: + mapping = {"left": "a", "right": "b"} + + # Map session IDs to left/right based on blind mapping + session_left = sid_a if mapping["left"] == "a" else sid_b + session_right = sid_a if mapping["right"] == "a" else sid_b + + # In blind mode, name the helper sessions by their neutral slot + # ("Model A" / "Model B") instead of the real model. Otherwise the + # session name leaks the model in the sidebar and GET /api/sessions, + # de-anonymizing the comparison before the user votes (issue #1285). + slot_name = {session_left: "Model A", session_right: "Model B"} # Create ephemeral sessions (prefixed [CMP]) for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]: - user = getattr(request.state, 'current_user', None) + name = f"[CMP] {slot_name[sid]}" if blind else f"[CMP] {model.split('/')[-1]}" session_manager.create_session( session_id=sid, - name=f"[CMP] {model.split('/')[-1]}", + name=name, endpoint_url=endpoint, model=model, rag=False, @@ -93,15 +113,6 @@ def setup_compare_routes(session_manager: SessionManager): finally: db.close() - # Blind mapping: randomly assign left/right - blind = str(is_blind).lower() == "true" - if blind: - mapping = {"left": "a", "right": "b"} - if random.random() > 0.5: - mapping = {"left": "b", "right": "a"} - else: - mapping = {"left": "a", "right": "b"} - # Store comparison record db = SessionLocal() try: @@ -121,18 +132,18 @@ def setup_compare_routes(session_manager: SessionManager): finally: db.close() - # Map session IDs to left/right based on blind mapping - session_left = sid_a if mapping["left"] == "a" else sid_b - session_right = sid_a if mapping["right"] == "a" else sid_b - + # In blind mode, withhold the model identities AND the left/right + # mapping from the response. The client already knows model_a/model_b + # (it sent them), so returning either would defeat blind mode. They are + # revealed by POST /api/compare/{id}/vote once the user has voted (#1285). return { "id": comp_id, "session_left": session_left, "session_right": session_right, - "model_left": model_a if mapping["left"] == "a" else model_b, - "model_right": model_a if mapping["right"] == "a" else model_b, + "model_left": None if blind else (model_a if mapping["left"] == "a" else model_b), + "model_right": None if blind else (model_a if mapping["right"] == "a" else model_b), "is_blind": blind, - "mapping": mapping, + "mapping": None if blind else mapping, } @router.post("/{comp_id}/vote") diff --git a/routes/session_routes.py b/routes/session_routes.py index d3b926f..1b38e4b 100644 --- a/routes/session_routes.py +++ b/routes/session_routes.py @@ -21,6 +21,22 @@ def _sanitize_export_filename(name: str) -> str: return name[:128] +# Blind-compare helper sessions are created with this name prefix. Their real +# model must never surface in the session list / sidebar — otherwise a blind +# comparison can be de-anonymized before the user votes (issue #1285). +COMPARE_SESSION_PREFIX = "[CMP] " + + +def _public_model(name: str, model: str) -> str: + """Blank out the real model of blind-compare helper sessions so the + session list can't be used to map a neutral pane label ("Model A") back + to its model. The Compare UI tracks models client-side, so hiding it here + costs the sidebar nothing. See issue #1285.""" + if (name or "").startswith(COMPARE_SESSION_PREFIX): + return "" + return model + + def _verify_session_owner(request: Request, session_id: str, session_manager=None): """Verify the current user owns the session. Raises 404 if not. @@ -215,7 +231,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ finally: db.close() - sessions = [{"id": s.id, "name": s.name, "model": s.model, + sessions = [{"id": s.id, "name": s.name, "model": _public_model(s.name, s.model), "endpoint_url": s.endpoint_url, "rag": s.rag, "archived": s.archived, "folder": folder_map.get(s.id), "total_tokens": token_map.get(s.id, 0), diff --git a/static/js/compare/index.js b/static/js/compare/index.js index cd1d580..e6c00ae 100644 --- a/static/js/compare/index.js +++ b/static/js/compare/index.js @@ -210,7 +210,9 @@ async function _buildCompareUI() { for (let i = 0; i < n; i++) { const m = state._selectedModels[i]; const fd = new FormData(); - fd.append('name', '[CMP] ' + modelShorts[i]); + // Blind mode: name the session by its neutral slot so the sidebar / + // GET /api/sessions can't de-anonymize the comparison (issue #1285). + fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(i) : modelShorts[i])); fd.append('endpoint_url', m.endpoint || ''); fd.append('model', m.model || ''); if (m.endpointId) { diff --git a/static/js/compare/panes.js b/static/js/compare/panes.js index fe03bad..eda3e33 100644 --- a/static/js/compare/panes.js +++ b/static/js/compare/panes.js @@ -382,7 +382,8 @@ async function _createAndAppendPane(m) { // Create session const fd = new FormData(); - fd.append('name', '[CMP] ' + m.name); + // Blind mode: neutral slot name only — never leak the model (issue #1285). + fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(i) : m.name)); fd.append('endpoint_url', m.url || ''); fd.append('model', m.id || ''); if (m.endpointId) { @@ -584,7 +585,8 @@ function _showModelSwapDropdown(paneIdx, titleBtn) { fetch(`${state.API_BASE}/api/session/${oldSid}`, { method: 'DELETE' }).catch(() => {}); } const fd = new FormData(); - fd.append('name', '[CMP] ' + m.name); + // Blind mode: neutral slot name only — never leak the model (issue #1285). + fd.append('name', '[CMP] ' + (state._blindMode ? 'Model ' + _slotChar(paneIdx) : m.name)); fd.append('endpoint_url', m.url || ''); fd.append('model', m.id || ''); if (m.endpointId) { diff --git a/tests/test_blind_compare_redaction.py b/tests/test_blind_compare_redaction.py new file mode 100644 index 0000000..127df00 --- /dev/null +++ b/tests/test_blind_compare_redaction.py @@ -0,0 +1,100 @@ +"""Regression tests for issue #1285 — blind Compare must not leak model +identities through helper-session names or GET /api/sessions. + +Two guards are pinned here: + +1. Backend: ``routes.session_routes._public_model`` blanks the ``model`` field + of any ``[CMP] …`` helper session in the session list, so the sidebar / + ``/api/sessions`` can't be used to map a neutral pane label ("Model A") + back to its real model. +2. Frontend: every ``[CMP]`` session name built in ``static/js/compare/`` is + guarded by ``state._blindMode`` so blind sessions are named by slot rather + than by the real model. + +The backend import mirrors tests/test_session_ghost_delete.py: stub the heavy +ORM modules so the real route module imports under conftest's MagicMock +sqlalchemy stub, then restore sys.modules so the stubs don't leak into sibling +test modules. +""" + +import sys +import importlib +from pathlib import Path +from unittest.mock import MagicMock + +_REPO = Path(__file__).resolve().parent.parent + +# Mirror tests/test_session_ghost_delete.py exactly: stub only the ORM *class* +# modules and import the REAL core.session_manager + src.auth_helpers. pytest +# caches routes.session_routes after the first import, so stubbing auth_helpers / +# session_manager here would poison the shared module for the sibling session +# tests (whichever file is collected first wins). Matching their stub set keeps +# the cached module identical regardless of collection order. +_ABSENT = object() +_TEMP_STUBS = ("core.database", "core.models", "src.request_models") +_saved = {name: sys.modules.get(name, _ABSENT) for name in _TEMP_STUBS} +_saved["core.session_manager"] = sys.modules.get("core.session_manager", _ABSENT) +try: + for _name in _TEMP_STUBS: + sys.modules[_name] = MagicMock(name=_name) + if isinstance(sys.modules.get("core.session_manager"), MagicMock): + del sys.modules["core.session_manager"] + importlib.import_module("core.session_manager") + import routes.session_routes as SR # noqa: E402 +finally: + for _name, _val in _saved.items(): + if _val is _ABSENT: + sys.modules.pop(_name, None) + else: + sys.modules[_name] = _val + + +# ── backend: GET /api/sessions model redaction ───────────────────────────── + +def test_public_model_blanks_blind_compare_sessions(): + """A blind-compare helper session ("[CMP] Model A") must not expose its + real model in the session list — that is the de-anonymization vector.""" + assert SR._public_model("[CMP] Model A", "gpt-4o") == "" + assert SR._public_model("[CMP] Model B", "llama-3.1-70b") == "" + + +def test_public_model_blanks_any_cmp_prefixed_session(): + """Defense in depth: even a non-blind [CMP] session (named after the real + model) gets its model field blanked. The name already carries whatever the + user chose to reveal, and the session list never needs the raw model.""" + assert SR._public_model("[CMP] gpt-4o", "gpt-4o") == "" + + +def test_public_model_preserves_normal_sessions(): + """Ordinary chats are untouched — only the [CMP] prefix triggers redaction. + The post-vote "Compare: a vs b" folder is a normal session, not a helper.""" + assert SR._public_model("My research chat", "gpt-4o") == "gpt-4o" + assert SR._public_model("", "claude-sonnet") == "claude-sonnet" + assert SR._public_model("Compare: gpt-4o vs llama", "gpt-4o") == "gpt-4o" + + +def test_compare_prefix_constant_matches_frontend(): + """The redaction prefix must match what the frontend prepends, or the + guard silently stops matching new sessions.""" + assert SR.COMPARE_SESSION_PREFIX == "[CMP] " + + +# ── frontend: every [CMP] session name is blind-guarded ──────────────────── + +def test_compare_session_names_are_blind_guarded(): + """Every line in static/js/compare/ that builds a '[CMP]' session name + must branch on state._blindMode, so a blind comparison is never named + after its real model. Pins the #1285 fix against regressions.""" + compare_dir = _REPO / "static" / "js" / "compare" + assert compare_dir.is_dir(), f"missing {compare_dir}" + offenders = [] + for path in sorted(compare_dir.glob("*.js")): + for lineno, line in enumerate( + path.read_text(encoding="utf-8").splitlines(), 1 + ): + if "'[CMP] '" in line and "_blindMode" not in line: + offenders.append(f"{path.name}:{lineno}: {line.strip()}") + assert not offenders, ( + "Compare session names must be blind-guarded (issue #1285):\n" + + "\n".join(offenders) + )