fix(research): owner-scope endpoint resolution

POST /api/research/start (require_privilege "can_use_research" — a normal
user, not admin) resolves an endpoint two ways and feeds the row's *decrypted*
api_key + base_url into research_handler.start_research(llm_endpoint=,
llm_headers=):

  1. body.endpoint_id  -> query(ModelEndpoint).filter(id == endpoint_id,
                          is_enabled == True).first()
  2. no endpoint + nothing configured -> query(ModelEndpoint).filter(
                          is_enabled == True).first()

Neither was owner-scoped. ModelEndpoint is a per-user resource (core/database.py:
non-null owner = private, "the model picker only shows the endpoint to that
user"). So a research-privileged user (or a chat-scoped token) could pass another
user's PRIVATE endpoint_id — or fall through to their first-enabled row — and run
research against that owner's endpoint: spending their API key / quota and
reaching whatever internal base_url they configured (SSRF).

This is the same multi-tenant owner-scoping class already fixed for
companion/models, the /api/v1/chat session gate (#870), and the /api/v1/chat
first-enabled fallback (#1045, _first_enabled_endpoint). These two sinks on the
research path were missed.

Extract `_owned_enabled_endpoint(db, owner, endpoint_id=None)` which scopes via
the shared owner_filter helper (own rows + legacy null-owner shared rows),
matching webhook_routes._first_enabled_endpoint and session_routes._owned_endpoint.
Used for both sinks. A scoped miss on the explicit-id path returns the existing
404 ("Endpoint not found or disabled"), so endpoint existence isn't revealed. A
null/empty owner stays a no-op (single-user / legacy mode).

Add regression tests pinning both lookups (cross-owner rejected, own-row
allowed, legacy shared-row allowed, disabled-skipped, fallback never borrows,
null-owner no-op).
This commit is contained in:
Mahdi Salmanzade
2026-06-04 02:19:28 +04:00
committed by GitHub
parent 729a30a10e
commit 271489a10c
2 changed files with 164 additions and 9 deletions

View File

@@ -48,6 +48,30 @@ def _resolve_research_endpoint(sess) -> tuple:
return url, model, headers
def _owned_enabled_endpoint(db, owner, endpoint_id=None):
"""An enabled ModelEndpoint VISIBLE to `owner` (their own rows + legacy
null-owner "shared" rows), optionally narrowed to a specific endpoint_id;
None if nothing visible matches.
Owner-scoped on purpose. ModelEndpoint is per-user (core/database.py: non-null
owner = private, "the model picker only shows the endpoint to that user") and
holds a decrypted `api_key`. /api/research/start feeds the resolved row's
api_key + base_url into research_handler.start_research(llm_endpoint=,
llm_headers=), so an UNSCOPED lookup — by the caller-supplied endpoint_id, or
via the bare first-enabled fallback — would let a research-privileged user
spend ANOTHER user's API key/quota and reach whatever internal base_url they
configured. Mirrors webhook_routes._first_enabled_endpoint and
session_routes._owned_endpoint. A null/empty owner is a no-op (single-user /
legacy mode).
"""
from src.database import ModelEndpoint
from src.auth_helpers import owner_filter
q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) # noqa: E712
if endpoint_id:
q = q.filter(ModelEndpoint.id == endpoint_id)
return owner_filter(q, ModelEndpoint, owner).first()
def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
router = APIRouter(tags=["research"])
@@ -344,14 +368,13 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if body.endpoint_id:
from src.database import SessionLocal
from src.database import ModelEndpoint
from src.endpoint_resolver import normalize_base, build_chat_url, build_headers
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.id == body.endpoint_id,
ModelEndpoint.is_enabled == True,
).first()
# Owner-scoped: never resolve another user's private endpoint
# (and its decrypted api_key / internal base_url). A scoped miss
# reads as 404 so the endpoint's existence isn't revealed.
ep = _owned_enabled_endpoint(db, user, body.endpoint_id)
if not ep:
raise HTTPException(404, "Endpoint not found or disabled")
base = normalize_base(ep.base_url)
@@ -382,13 +405,14 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
ep_url, ep_model, ep_headers = resolve_endpoint("chat")
if not ep_url:
from src.database import SessionLocal
from src.database import ModelEndpoint
from src.endpoint_resolver import normalize_base, build_chat_url, build_headers
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.is_enabled == True,
).first()
# Owner-scoped first-enabled fallback: the caller's own rows
# + legacy null-owner shared rows only — never borrow another
# user's private endpoint/api_key. Same fix as the
# /api/v1/chat fallback (webhook_routes._first_enabled_endpoint).
ep = _owned_enabled_endpoint(db, user)
if ep:
base = normalize_base(ep.base_url)
ep_url = build_chat_url(base)

View File

@@ -0,0 +1,131 @@
"""Owner-scope regression for /api/research/start endpoint resolution.
`research_start()` resolves a CALLER-SUPPLIED `endpoint_id` (and, with nothing
configured, a bare first-enabled fallback) to a `ModelEndpoint` whose *decrypted*
api_key + base_url then drive the research LLM calls
(`start_research(llm_endpoint=, llm_headers=)`). Both lookups must be
owner-scoped — the caller's own rows plus legacy null-owner ("shared") rows —
so a research-privileged user (or a chat-scoped token) can't bind a research run
to ANOTHER user's PRIVATE endpoint and silently spend that owner's API key /
reach whatever internal base_url they configured. Mirrors the
webhook `_first_enabled_endpoint` (#1045) and session `_owned_endpoint` fixes.
"""
import sys
import types
from types import SimpleNamespace
from unittest.mock import MagicMock
# The helper resolves `from src.database import ModelEndpoint` at call time.
# Stub the module so we can hand it a fake declarative class whose column
# comparisons return inspectable predicates (the real one is a SQLAlchemy
# class, MagicMock'd to oblivion by conftest). owner_filter stays REAL.
_sd = types.ModuleType("src.database")
_sd.ModelEndpoint = MagicMock()
sys.modules.setdefault("src.database", _sd)
from routes.research_routes import _owned_enabled_endpoint # noqa: E402
class _Predicate:
def __init__(self, check):
self._check = check
def __call__(self, row):
return self._check(row)
def __or__(self, other):
return _Predicate(lambda row: self(row) or other(row))
class _Column:
def __init__(self, name):
self.name = name
def __eq__(self, value):
return _Predicate(lambda row: getattr(row, self.name) == value)
class _ModelEndpoint:
id = _Column("id")
is_enabled = _Column("is_enabled")
owner = _Column("owner")
class _Query:
def __init__(self, rows):
self._rows = list(rows)
def filter(self, *predicates):
self._rows = [r for r in self._rows if all(p(r) for p in predicates)]
return self
def first(self):
return self._rows[0] if self._rows else None
class _DB:
def __init__(self, rows):
self._rows = rows
def query(self, model):
assert model is _ModelEndpoint
return _Query(self._rows)
def _ep(eid, owner, *, is_enabled=True):
return SimpleNamespace(id=eid, owner=owner, is_enabled=is_enabled, api_key="sk-secret")
def _resolve(rows, owner, endpoint_id=None):
sys.modules["src.database"].ModelEndpoint = _ModelEndpoint
return _owned_enabled_endpoint(_DB(rows), owner, endpoint_id)
# --- explicit endpoint_id (POST /api/research/start, body.endpoint_id) --------
def test_endpoint_id_rejects_another_owners_private_endpoint():
# bob's private endpoint exists, but alice asking for it by id resolves None
# → the route raises 404 ("Endpoint not found or disabled"), never builds
# headers from bob's key.
rows = [_ep("ep-bob", "bob"), _ep("ep-alice", "alice")]
assert _resolve(rows, "alice", "ep-bob") is None
def test_endpoint_id_returns_callers_own_endpoint():
rows = [_ep("ep-bob", "bob"), _ep("ep-alice", "alice")]
ep = _resolve(rows, "alice", "ep-alice")
assert ep is not None and ep.id == "ep-alice"
def test_endpoint_id_allows_legacy_null_owner_shared_row():
rows = [_ep("ep-shared", None)]
ep = _resolve(rows, "alice", "ep-shared")
assert ep is not None and ep.id == "ep-shared"
def test_endpoint_id_skips_disabled_even_when_owned():
rows = [_ep("ep-alice", "alice", is_enabled=False)]
assert _resolve(rows, "alice", "ep-alice") is None
# --- bare first-enabled fallback (no endpoint_id, nothing configured) ---------
def test_fallback_never_picks_another_owners_endpoint():
# bob's private endpoint is first in the table, alice must never borrow it.
rows = [_ep("ep-bob", "bob"), _ep("ep-shared", None)]
ep = _resolve(rows, "alice")
assert ep is not None and ep.id == "ep-shared"
def test_fallback_returns_none_when_only_others_endpoints():
rows = [_ep("ep-bob", "bob"), _ep("ep-carol", "carol")]
assert _resolve(rows, "alice") is None
# --- legacy single-user / unresolved owner: owner_filter no-op ---------------
def test_null_owner_is_legacy_single_user_noop():
rows = [_ep("ep-x", "bob"), _ep("ep-y", "alice")]
ep = _resolve(rows, None, "ep-x")
assert ep is not None and ep.id == "ep-x"