Make services.search.analytics tolerate missing counters in older or partial analytics files by merging loaded data over defaults, with regression coverage.
141 lines
4.8 KiB
Python
141 lines
4.8 KiB
Python
"""Search analytics, metrics tracking, and exception hierarchy."""
|
|
|
|
import json
|
|
import logging
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
from typing import Dict, Any
|
|
|
|
from .cache import cache_metrics
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Dedicated error logger with file handler
|
|
_error_log_path = Path(__file__).resolve().parent.parent / "search_engine_error.log"
|
|
_error_handler = logging.FileHandler(_error_log_path, encoding="utf-8")
|
|
_error_handler.setLevel(logging.WARNING)
|
|
_error_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
|
|
error_logger = logging.getLogger("search_engine_error")
|
|
error_logger.addHandler(_error_handler)
|
|
error_logger.propagate = False
|
|
|
|
# Analytics file
|
|
ANALYTICS_FILE = Path(__file__).resolve().parent.parent / "search_analytics.json"
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Custom exception hierarchy
|
|
# ----------------------------------------------------------------------
|
|
class SearchEngineError(Exception):
|
|
"""Base class for all search-engine related errors."""
|
|
|
|
|
|
class NetworkError(SearchEngineError):
|
|
"""Raised when a network request fails (e.g., timeout, DNS error)."""
|
|
|
|
|
|
class ParseError(SearchEngineError):
|
|
"""Raised when HTML or other content cannot be parsed."""
|
|
|
|
|
|
class RateLimitError(SearchEngineError):
|
|
"""Raised when the remote service returns a rate-limit (HTTP 429)."""
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Analytics helpers
|
|
# ----------------------------------------------------------------------
|
|
def _default_analytics() -> Dict[str, Any]:
|
|
return {
|
|
"total_queries": 0,
|
|
"successful_queries": 0,
|
|
"failed_queries": 0,
|
|
"cache_hits": 0,
|
|
"cache_misses": 0,
|
|
"query_patterns": {},
|
|
}
|
|
|
|
|
|
def _load_analytics() -> Dict[str, Any]:
|
|
"""Load analytics data from the JSON file, creating defaults if missing."""
|
|
if not ANALYTICS_FILE.exists():
|
|
default = _default_analytics()
|
|
_save_analytics(default)
|
|
return default
|
|
try:
|
|
with open(ANALYTICS_FILE, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
# Merge over defaults so a file written by an older schema (or a
|
|
# partial write) still has every counter — _record_query indexes
|
|
# these keys directly and would otherwise raise KeyError.
|
|
merged = _default_analytics()
|
|
if isinstance(data, dict):
|
|
merged.update(data)
|
|
return merged
|
|
except Exception as e:
|
|
logger.warning(f"Failed to load analytics file: {e}")
|
|
return _default_analytics()
|
|
|
|
|
|
def _save_analytics(data: Dict[str, Any]) -> None:
|
|
"""Persist analytics data to the JSON file."""
|
|
try:
|
|
with open(ANALYTICS_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, indent=2)
|
|
except Exception as e:
|
|
logger.warning(f"Failed to write analytics file: {e}")
|
|
|
|
|
|
def _record_query(query: str, success: bool, cache_hit: bool) -> None:
|
|
"""Update analytics for a single query execution."""
|
|
analytics = _load_analytics()
|
|
analytics["total_queries"] += 1
|
|
if success:
|
|
analytics["successful_queries"] += 1
|
|
else:
|
|
analytics["failed_queries"] += 1
|
|
|
|
if cache_hit:
|
|
analytics["cache_hits"] += 1
|
|
cache_metrics["hits"] += 1
|
|
else:
|
|
analytics["cache_misses"] += 1
|
|
cache_metrics["misses"] += 1
|
|
|
|
patterns = analytics["query_patterns"]
|
|
entry = patterns.get(query, {"count": 0, "successes": 0})
|
|
entry["count"] += 1
|
|
if success:
|
|
entry["successes"] += 1
|
|
patterns[query] = entry
|
|
|
|
_save_analytics(analytics)
|
|
|
|
|
|
def get_search_stats() -> Dict[str, Any]:
|
|
"""Return aggregated search analytics."""
|
|
analytics = _load_analytics()
|
|
total = analytics.get("total_queries", 0) or 1
|
|
success_rate = analytics.get("successful_queries", 0) / total
|
|
cache_total = analytics.get("cache_hits", 0) + analytics.get("cache_misses", 0) or 1
|
|
cache_hit_rate = analytics.get("cache_hits", 0) / cache_total
|
|
|
|
pattern_counter = Counter({
|
|
q: data["count"] for q, data in analytics.get("query_patterns", {}).items()
|
|
})
|
|
most_common = [q for q, _ in pattern_counter.most_common(5)]
|
|
|
|
return {
|
|
"most_common_queries": most_common,
|
|
"success_rate": success_rate,
|
|
"cache_hit_rate": cache_hit_rate,
|
|
"total_queries": analytics.get("total_queries", 0),
|
|
"successful_queries": analytics.get("successful_queries", 0),
|
|
"failed_queries": analytics.get("failed_queries", 0),
|
|
"cache_hits": analytics.get("cache_hits", 0),
|
|
"cache_misses": analytics.get("cache_misses", 0),
|
|
"cache_evictions": cache_metrics["evictions"],
|
|
"runtime_cache_hits": cache_metrics["hits"],
|
|
"runtime_cache_misses": cache_metrics["misses"],
|
|
}
|