From 33bf97559706d7b43914824a5d32b8c8f22655ed Mon Sep 17 00:00:00 2001 From: Ethan Date: Wed, 3 Jun 2026 14:24:17 +1000 Subject: [PATCH] Stop GET /api/search/config from leaking the Brave API key (#1661) (#1750) get_search_config returned SEARCH_CONFIG.copy(), and update_search_config cached the decrypted Brave key into that shared global at startup (app_initializer), so the unauthenticated /api/search/config route exposed the operator's key. The cache was dead weight: brave_search reads its key via _get_provider_key (settings/env), never SEARCH_CONFIG. - update_search_config: no longer stores the api_key in the shared global (accepted for backward compat; provider keys are read on demand). - get_search_config: scrub any string-valued credential field before returning, preserving the has_api_key presence flag. No schema change; brave_search/_get_provider_key untouched. Adds regression tests. Fixes #1661 Co-authored-by: Ethan <23321960+0xLeathery@users.noreply.github.com> --- services/search/core.py | 34 +++++++++++++--- tests/test_search_config_no_key_leak.py | 53 +++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 5 deletions(-) create mode 100644 tests/test_search_config_no_key_leak.py diff --git a/services/search/core.py b/services/search/core.py index c6099d4..1230946 100644 --- a/services/search/core.py +++ b/services/search/core.py @@ -49,8 +49,18 @@ SEARCH_CONFIG: Dict[str, Any] = { } +def _is_secret_key(name: str) -> bool: + """True for config keys that hold a credential (e.g. ``brave_api_key``).""" + return name.endswith(("_api_key", "_key", "_token", "_secret")) + + def get_search_config() -> Dict[str, Any]: - """Get current search configuration including active provider info.""" + """Get current search configuration including active provider info. + + Never returns stored API keys: callers — including the unauthenticated + ``GET /api/search/config`` route — only need key *presence* via + ``has_api_key``, not the secret itself (#1661). + """ config = SEARCH_CONFIG.copy() settings = _get_search_settings() provider = settings.get("search_provider", "searxng") @@ -60,13 +70,27 @@ def get_search_config() -> Dict[str, Any]: if provider == "searxng": from .providers import _get_search_instance config["search_url"] = _get_search_instance() - return config + # Strip any string-valued credential so secrets never reach the response; + # the boolean has_api_key flag (presence only) is preserved. + return { + k: v for k, v in config.items() + if not (isinstance(v, str) and _is_secret_key(k)) + } def update_search_config(api_key: str = None, **kwargs): - """Update search configuration (e.g. Brave API key).""" - if api_key: - SEARCH_CONFIG["brave_api_key"] = api_key + """Merge non-secret search config into SEARCH_CONFIG. + + Provider API keys are intentionally NOT cached here. They are read on demand + from settings/env via ``_get_provider_key`` (e.g. ``brave_search``), so the + previous ``SEARCH_CONFIG["brave_api_key"] = api_key`` cache was never used + for search and only leaked the decrypted key through ``get_search_config`` / + ``GET /api/search/config`` (#1661). ``api_key`` is accepted for backward + compatibility but no longer stored. + """ + for k, v in kwargs.items(): + if not _is_secret_key(k): + SEARCH_CONFIG[k] = v def _call_provider(provider_name: str, query: str, count: int, time_filter: str = None) -> List[dict]: diff --git a/tests/test_search_config_no_key_leak.py b/tests/test_search_config_no_key_leak.py new file mode 100644 index 0000000..e73545b --- /dev/null +++ b/tests/test_search_config_no_key_leak.py @@ -0,0 +1,53 @@ +"""Regression guard for #1661 — GET /api/search/config must not leak API keys. + +`get_search_config()` returned `SEARCH_CONFIG.copy()`, and `update_search_config()` +cached the decrypted Brave key into that shared global at startup +(`src/app_initializer.py`), so the unauthenticated `/api/search/config` route +exposed the operator's key. The key is read on demand via `_get_provider_key` +(`brave_search`), so the cache was dead weight. Now the secret is never cached in +the global, and `get_search_config` scrubs any credential field from its response +while preserving the `has_api_key` presence flag. +""" +import os + +os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:") + +import pytest + +from services.search import core + + +def test_update_search_config_does_not_cache_secret(): + core.update_search_config(api_key="SUPER_SECRET") + assert "brave_api_key" not in core.SEARCH_CONFIG + assert "SUPER_SECRET" not in core.SEARCH_CONFIG.values() + + +@pytest.fixture +def stub_settings(monkeypatch): + monkeypatch.setattr(core, "_get_search_settings", lambda: {"search_provider": "brave"}) + monkeypatch.setattr(core, "_get_provider_key", lambda provider: "REAL_SECRET_KEY") + monkeypatch.setattr(core, "_get_result_count", lambda: 10) + + +def test_get_search_config_never_returns_a_secret(stub_settings, monkeypatch): + # Even if a secret somehow sits in the shared global, the response scrubs it. + monkeypatch.setitem(core.SEARCH_CONFIG, "brave_api_key", "LEAKED_SECRET") + + cfg = core.get_search_config() + + assert "brave_api_key" not in cfg + assert "LEAKED_SECRET" not in cfg.values() # the cached secret + assert "REAL_SECRET_KEY" not in cfg.values() # the live provider key + # Presence flag and non-secret fields are preserved. + assert cfg["has_api_key"] is True + assert cfg["active_provider"] == "brave" + + +def test_is_secret_key_keeps_presence_flag(): + # has_api_key matches the *_api_key suffix, but it is a bool — the isinstance + # guard in get_search_config keeps it; only string-valued secrets are dropped. + assert core._is_secret_key("brave_api_key") is True + assert core._is_secret_key("has_api_key") is True + assert core._is_secret_key("active_provider") is False + assert core._is_secret_key("search_url") is False