Merge search analytics defaults in services copy

Make services.search.analytics tolerate missing counters in older or partial analytics files by merging loaded data over defaults, with regression coverage.
This commit is contained in:
Afonso Coutinho
2026-06-03 05:45:07 +01:00
committed by GitHub
parent 10e797a1aa
commit f29c827e6e
2 changed files with 62 additions and 17 deletions

View File

@@ -45,32 +45,36 @@ class RateLimitError(SearchEngineError):
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
# Analytics helpers # Analytics helpers
# ---------------------------------------------------------------------- # ----------------------------------------------------------------------
def _default_analytics() -> Dict[str, Any]:
return {
"total_queries": 0,
"successful_queries": 0,
"failed_queries": 0,
"cache_hits": 0,
"cache_misses": 0,
"query_patterns": {},
}
def _load_analytics() -> Dict[str, Any]: def _load_analytics() -> Dict[str, Any]:
"""Load analytics data from the JSON file, creating defaults if missing.""" """Load analytics data from the JSON file, creating defaults if missing."""
if not ANALYTICS_FILE.exists(): if not ANALYTICS_FILE.exists():
default = { default = _default_analytics()
"total_queries": 0,
"successful_queries": 0,
"failed_queries": 0,
"cache_hits": 0,
"cache_misses": 0,
"query_patterns": {},
}
_save_analytics(default) _save_analytics(default)
return default return default
try: try:
with open(ANALYTICS_FILE, "r", encoding="utf-8") as f: with open(ANALYTICS_FILE, "r", encoding="utf-8") as f:
return json.load(f) data = json.load(f)
# Merge over defaults so a file written by an older schema (or a
# partial write) still has every counter — _record_query indexes
# these keys directly and would otherwise raise KeyError.
merged = _default_analytics()
if isinstance(data, dict):
merged.update(data)
return merged
except Exception as e: except Exception as e:
logger.warning(f"Failed to load analytics file: {e}") logger.warning(f"Failed to load analytics file: {e}")
return { return _default_analytics()
"total_queries": 0,
"successful_queries": 0,
"failed_queries": 0,
"cache_hits": 0,
"cache_misses": 0,
"query_patterns": {},
}
def _save_analytics(data: Dict[str, Any]) -> None: def _save_analytics(data: Dict[str, Any]) -> None:

View File

@@ -0,0 +1,41 @@
"""Default-merge on load for services/search/analytics.py.
src/search/analytics.py was fixed to merge a loaded analytics file over
defaults so _record_query never hits a missing counter, but the services
copy diverged and still returns json.load(f) verbatim. The services copy
is the live one: services/search/core.py calls _record_query on every
search, so an analytics file missing a key (older schema or partial
write) raises KeyError and breaks comprehensive_web_search.
Mirrors tests/test_search_analytics_defaults.py which covers the src copy.
"""
import json
import services.search.analytics as analytics
def test_load_merges_defaults_for_partial_file(tmp_path, monkeypatch):
f = tmp_path / "search_analytics.json"
f.write_text(json.dumps({"total_queries": 5}), encoding="utf-8")
monkeypatch.setattr(analytics, "ANALYTICS_FILE", f)
data = analytics._load_analytics()
assert data["total_queries"] == 5
assert data["query_patterns"] == {}
for key in ("successful_queries", "failed_queries", "cache_hits", "cache_misses"):
assert data[key] == 0
def test_record_query_survives_partial_file(tmp_path, monkeypatch):
f = tmp_path / "search_analytics.json"
f.write_text(json.dumps({"total_queries": 1}), encoding="utf-8")
monkeypatch.setattr(analytics, "ANALYTICS_FILE", f)
# Before the fix this raised KeyError on the missing counters.
analytics._record_query("hello world", success=True, cache_hit=False)
data = analytics._load_analytics()
assert data["total_queries"] == 2
assert data["successful_queries"] == 1
assert data["query_patterns"]["hello world"]["count"] == 1