Odysseus v1.0
This commit is contained in:
136
src/search/analytics.py
Normal file
136
src/search/analytics.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Search analytics, metrics tracking, and exception hierarchy."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
|
||||
from .cache import cache_metrics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Dedicated error logger with file handler
|
||||
_error_log_path = Path(__file__).resolve().parent.parent / "search_engine_error.log"
|
||||
_error_handler = logging.FileHandler(_error_log_path, encoding="utf-8")
|
||||
_error_handler.setLevel(logging.WARNING)
|
||||
_error_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
|
||||
error_logger = logging.getLogger("search_engine_error")
|
||||
error_logger.addHandler(_error_handler)
|
||||
error_logger.propagate = False
|
||||
|
||||
# Analytics file
|
||||
ANALYTICS_FILE = Path(__file__).resolve().parent.parent / "search_analytics.json"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Custom exception hierarchy
|
||||
# ----------------------------------------------------------------------
|
||||
class SearchEngineError(Exception):
|
||||
"""Base class for all search-engine related errors."""
|
||||
|
||||
|
||||
class NetworkError(SearchEngineError):
|
||||
"""Raised when a network request fails (e.g., timeout, DNS error)."""
|
||||
|
||||
|
||||
class ParseError(SearchEngineError):
|
||||
"""Raised when HTML or other content cannot be parsed."""
|
||||
|
||||
|
||||
class RateLimitError(SearchEngineError):
|
||||
"""Raised when the remote service returns a rate-limit (HTTP 429)."""
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Analytics helpers
|
||||
# ----------------------------------------------------------------------
|
||||
def _load_analytics() -> Dict[str, Any]:
|
||||
"""Load analytics data from the JSON file, creating defaults if missing."""
|
||||
if not ANALYTICS_FILE.exists():
|
||||
default = {
|
||||
"total_queries": 0,
|
||||
"successful_queries": 0,
|
||||
"failed_queries": 0,
|
||||
"cache_hits": 0,
|
||||
"cache_misses": 0,
|
||||
"query_patterns": {},
|
||||
}
|
||||
_save_analytics(default)
|
||||
return default
|
||||
try:
|
||||
with open(ANALYTICS_FILE, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load analytics file: {e}")
|
||||
return {
|
||||
"total_queries": 0,
|
||||
"successful_queries": 0,
|
||||
"failed_queries": 0,
|
||||
"cache_hits": 0,
|
||||
"cache_misses": 0,
|
||||
"query_patterns": {},
|
||||
}
|
||||
|
||||
|
||||
def _save_analytics(data: Dict[str, Any]) -> None:
|
||||
"""Persist analytics data to the JSON file."""
|
||||
try:
|
||||
with open(ANALYTICS_FILE, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to write analytics file: {e}")
|
||||
|
||||
|
||||
def _record_query(query: str, success: bool, cache_hit: bool) -> None:
|
||||
"""Update analytics for a single query execution."""
|
||||
analytics = _load_analytics()
|
||||
analytics["total_queries"] += 1
|
||||
if success:
|
||||
analytics["successful_queries"] += 1
|
||||
else:
|
||||
analytics["failed_queries"] += 1
|
||||
|
||||
if cache_hit:
|
||||
analytics["cache_hits"] += 1
|
||||
cache_metrics["hits"] += 1
|
||||
else:
|
||||
analytics["cache_misses"] += 1
|
||||
cache_metrics["misses"] += 1
|
||||
|
||||
patterns = analytics["query_patterns"]
|
||||
entry = patterns.get(query, {"count": 0, "successes": 0})
|
||||
entry["count"] += 1
|
||||
if success:
|
||||
entry["successes"] += 1
|
||||
patterns[query] = entry
|
||||
|
||||
_save_analytics(analytics)
|
||||
|
||||
|
||||
def get_search_stats() -> Dict[str, Any]:
|
||||
"""Return aggregated search analytics."""
|
||||
analytics = _load_analytics()
|
||||
total = analytics.get("total_queries", 0) or 1
|
||||
success_rate = analytics.get("successful_queries", 0) / total
|
||||
cache_total = analytics.get("cache_hits", 0) + analytics.get("cache_misses", 0) or 1
|
||||
cache_hit_rate = analytics.get("cache_hits", 0) / cache_total
|
||||
|
||||
pattern_counter = Counter({
|
||||
q: data["count"] for q, data in analytics.get("query_patterns", {}).items()
|
||||
})
|
||||
most_common = [q for q, _ in pattern_counter.most_common(5)]
|
||||
|
||||
return {
|
||||
"most_common_queries": most_common,
|
||||
"success_rate": success_rate,
|
||||
"cache_hit_rate": cache_hit_rate,
|
||||
"total_queries": analytics.get("total_queries", 0),
|
||||
"successful_queries": analytics.get("successful_queries", 0),
|
||||
"failed_queries": analytics.get("failed_queries", 0),
|
||||
"cache_hits": analytics.get("cache_hits", 0),
|
||||
"cache_misses": analytics.get("cache_misses", 0),
|
||||
"cache_evictions": cache_metrics["evictions"],
|
||||
"runtime_cache_hits": cache_metrics["hits"],
|
||||
"runtime_cache_misses": cache_metrics["misses"],
|
||||
}
|
||||
Reference in New Issue
Block a user