From 203c4d83df3a9d8cd45b94180a3cb6de97c5d185 Mon Sep 17 00:00:00 2001 From: Afonso Coutinho <116525378+afonsopc@users.noreply.github.com> Date: Tue, 2 Jun 2026 16:26:37 +0100 Subject: [PATCH] fix: search analytics crashes recording when the JSON file predates a counter (#1224) * refactor: single _default_analytics() instead of duplicated default dicts * fix: merge analytics defaults so an old/partial file doesn't KeyError on record * test: analytics load merges defaults; record survives a partial file --- src/search/analytics.py | 39 ++++++++++++++----------- tests/test_search_analytics_defaults.py | 33 +++++++++++++++++++++ 2 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 tests/test_search_analytics_defaults.py diff --git a/src/search/analytics.py b/src/search/analytics.py index 39b00dd..58aa1b0 100644 --- a/src/search/analytics.py +++ b/src/search/analytics.py @@ -45,32 +45,37 @@ class RateLimitError(SearchEngineError): # ---------------------------------------------------------------------- # Analytics helpers # ---------------------------------------------------------------------- +def _default_analytics() -> Dict[str, Any]: + """A fresh analytics document with every counter present.""" + return { + "total_queries": 0, + "successful_queries": 0, + "failed_queries": 0, + "cache_hits": 0, + "cache_misses": 0, + "query_patterns": {}, + } + + def _load_analytics() -> Dict[str, Any]: """Load analytics data from the JSON file, creating defaults if missing.""" if not ANALYTICS_FILE.exists(): - default = { - "total_queries": 0, - "successful_queries": 0, - "failed_queries": 0, - "cache_hits": 0, - "cache_misses": 0, - "query_patterns": {}, - } + default = _default_analytics() _save_analytics(default) return default try: with open(ANALYTICS_FILE, "r", encoding="utf-8") as f: - return json.load(f) + data = json.load(f) + # Merge over defaults so a file written by an older schema (or a + # partial write) still has every counter — _record_query indexes + # these keys directly and would otherwise raise KeyError. + merged = _default_analytics() + if isinstance(data, dict): + merged.update(data) + return merged except Exception as e: logger.warning(f"Failed to load analytics file: {e}") - return { - "total_queries": 0, - "successful_queries": 0, - "failed_queries": 0, - "cache_hits": 0, - "cache_misses": 0, - "query_patterns": {}, - } + return _default_analytics() def _save_analytics(data: Dict[str, Any]) -> None: diff --git a/tests/test_search_analytics_defaults.py b/tests/test_search_analytics_defaults.py new file mode 100644 index 0000000..150eb8e --- /dev/null +++ b/tests/test_search_analytics_defaults.py @@ -0,0 +1,33 @@ +"""Tests for analytics default-merge on load (src/search/analytics.py).""" +import json + +import src.search.analytics as analytics + + +def test_load_merges_defaults_for_partial_file(tmp_path, monkeypatch): + # A file written by an older schema is missing most counters. + f = tmp_path / "search_analytics.json" + f.write_text(json.dumps({"total_queries": 5}), encoding="utf-8") + monkeypatch.setattr(analytics, "ANALYTICS_FILE", f) + + data = analytics._load_analytics() + + # Existing value preserved, every missing counter filled with its default. + assert data["total_queries"] == 5 + assert data["query_patterns"] == {} + for key in ("successful_queries", "failed_queries", "cache_hits", "cache_misses"): + assert data[key] == 0 + + +def test_record_query_survives_partial_file(tmp_path, monkeypatch): + f = tmp_path / "search_analytics.json" + f.write_text(json.dumps({"total_queries": 1}), encoding="utf-8") + monkeypatch.setattr(analytics, "ANALYTICS_FILE", f) + + # Before the fix this raised KeyError on the missing counters. + analytics._record_query("hello world", success=True, cache_hit=False) + + data = analytics._load_analytics() + assert data["total_queries"] == 2 + assert data["successful_queries"] == 1 + assert data["query_patterns"]["hello world"]["count"] == 1