Merge search analytics defaults in services copy

Make services.search.analytics tolerate missing counters in older or partial analytics files by merging loaded data over defaults, with regression coverage.
2026-06-03 05:45:07 +01:00
parent 10e797a1aa
commit f29c827e6e
2 changed files with 62 additions and 17 deletions
--- a/services/search/analytics.py
+++ b/services/search/analytics.py
@@ -45,32 +45,36 @@ class RateLimitError(SearchEngineError):
 # ----------------------------------------------------------------------
 # Analytics helpers
 # ----------------------------------------------------------------------
 def _default_analytics() -> Dict[str, Any]:
    return {
        "total_queries": 0,
        "successful_queries": 0,
        "failed_queries": 0,
        "cache_hits": 0,
        "cache_misses": 0,
        "query_patterns": {},
    }
 def _load_analytics() -> Dict[str, Any]:
    """Load analytics data from the JSON file, creating defaults if missing."""
    if not ANALYTICS_FILE.exists():
-        default = {
+        default = _default_analytics()
            "total_queries": 0,
            "successful_queries": 0,
            "failed_queries": 0,
            "cache_hits": 0,
            "cache_misses": 0,
            "query_patterns": {},
        }
        _save_analytics(default)
        return default
    try:
        with open(ANALYTICS_FILE, "r", encoding="utf-8") as f:
-            return json.load(f)
+            data = json.load(f)
        # Merge over defaults so a file written by an older schema (or a
        # partial write) still has every counter — _record_query indexes
        # these keys directly and would otherwise raise KeyError.
        merged = _default_analytics()
        if isinstance(data, dict):
            merged.update(data)
        return merged
    except Exception as e:
        logger.warning(f"Failed to load analytics file: {e}")
-        return {
+        return _default_analytics()
            "total_queries": 0,
            "successful_queries": 0,
            "failed_queries": 0,
            "cache_hits": 0,
            "cache_misses": 0,
            "query_patterns": {},
        }
 def _save_analytics(data: Dict[str, Any]) -> None:
--- a/tests/test_services_search_analytics_defaults.py
+++ b/tests/test_services_search_analytics_defaults.py
@@ -0,0 +1,41 @@
 """Default-merge on load for services/search/analytics.py.
 src/search/analytics.py was fixed to merge a loaded analytics file over
 defaults so _record_query never hits a missing counter, but the services
 copy diverged and still returns json.load(f) verbatim. The services copy
 is the live one: services/search/core.py calls _record_query on every
 search, so an analytics file missing a key (older schema or partial
 write) raises KeyError and breaks comprehensive_web_search.
 Mirrors tests/test_search_analytics_defaults.py which covers the src copy.
 """
 import json
 import services.search.analytics as analytics
 def test_load_merges_defaults_for_partial_file(tmp_path, monkeypatch):
    f = tmp_path / "search_analytics.json"
    f.write_text(json.dumps({"total_queries": 5}), encoding="utf-8")
    monkeypatch.setattr(analytics, "ANALYTICS_FILE", f)
    data = analytics._load_analytics()
    assert data["total_queries"] == 5
    assert data["query_patterns"] == {}
    for key in ("successful_queries", "failed_queries", "cache_hits", "cache_misses"):
        assert data[key] == 0
 def test_record_query_survives_partial_file(tmp_path, monkeypatch):
    f = tmp_path / "search_analytics.json"
    f.write_text(json.dumps({"total_queries": 1}), encoding="utf-8")
    monkeypatch.setattr(analytics, "ANALYTICS_FILE", f)
    # Before the fix this raised KeyError on the missing counters.
    analytics._record_query("hello world", success=True, cache_hit=False)
    data = analytics._load_analytics()
    assert data["total_queries"] == 2
    assert data["successful_queries"] == 1
    assert data["query_patterns"]["hello world"]["count"] == 1