diff --git a/services/search/analytics.py b/services/search/analytics.py index 39b00dd..64e61e9 100644 --- a/services/search/analytics.py +++ b/services/search/analytics.py @@ -45,32 +45,36 @@ class RateLimitError(SearchEngineError): # ---------------------------------------------------------------------- # Analytics helpers # ---------------------------------------------------------------------- +def _default_analytics() -> Dict[str, Any]: + return { + "total_queries": 0, + "successful_queries": 0, + "failed_queries": 0, + "cache_hits": 0, + "cache_misses": 0, + "query_patterns": {}, + } + + def _load_analytics() -> Dict[str, Any]: """Load analytics data from the JSON file, creating defaults if missing.""" if not ANALYTICS_FILE.exists(): - default = { - "total_queries": 0, - "successful_queries": 0, - "failed_queries": 0, - "cache_hits": 0, - "cache_misses": 0, - "query_patterns": {}, - } + default = _default_analytics() _save_analytics(default) return default try: with open(ANALYTICS_FILE, "r", encoding="utf-8") as f: - return json.load(f) + data = json.load(f) + # Merge over defaults so a file written by an older schema (or a + # partial write) still has every counter — _record_query indexes + # these keys directly and would otherwise raise KeyError. + merged = _default_analytics() + if isinstance(data, dict): + merged.update(data) + return merged except Exception as e: logger.warning(f"Failed to load analytics file: {e}") - return { - "total_queries": 0, - "successful_queries": 0, - "failed_queries": 0, - "cache_hits": 0, - "cache_misses": 0, - "query_patterns": {}, - } + return _default_analytics() def _save_analytics(data: Dict[str, Any]) -> None: diff --git a/tests/test_services_search_analytics_defaults.py b/tests/test_services_search_analytics_defaults.py new file mode 100644 index 0000000..a0a67c2 --- /dev/null +++ b/tests/test_services_search_analytics_defaults.py @@ -0,0 +1,41 @@ +"""Default-merge on load for services/search/analytics.py. + +src/search/analytics.py was fixed to merge a loaded analytics file over +defaults so _record_query never hits a missing counter, but the services +copy diverged and still returns json.load(f) verbatim. The services copy +is the live one: services/search/core.py calls _record_query on every +search, so an analytics file missing a key (older schema or partial +write) raises KeyError and breaks comprehensive_web_search. + +Mirrors tests/test_search_analytics_defaults.py which covers the src copy. +""" +import json + +import services.search.analytics as analytics + + +def test_load_merges_defaults_for_partial_file(tmp_path, monkeypatch): + f = tmp_path / "search_analytics.json" + f.write_text(json.dumps({"total_queries": 5}), encoding="utf-8") + monkeypatch.setattr(analytics, "ANALYTICS_FILE", f) + + data = analytics._load_analytics() + + assert data["total_queries"] == 5 + assert data["query_patterns"] == {} + for key in ("successful_queries", "failed_queries", "cache_hits", "cache_misses"): + assert data[key] == 0 + + +def test_record_query_survives_partial_file(tmp_path, monkeypatch): + f = tmp_path / "search_analytics.json" + f.write_text(json.dumps({"total_queries": 1}), encoding="utf-8") + monkeypatch.setattr(analytics, "ANALYTICS_FILE", f) + + # Before the fix this raised KeyError on the missing counters. + analytics._record_query("hello world", success=True, cache_hit=False) + + data = analytics._load_analytics() + assert data["total_queries"] == 2 + assert data["successful_queries"] == 1 + assert data["query_patterns"]["hello world"]["count"] == 1