Odysseus v1.0

This commit is contained in:
pewdiepie-archdaemon
2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions

136
src/search/analytics.py Normal file
View File

@@ -0,0 +1,136 @@
"""Search analytics, metrics tracking, and exception hierarchy."""
import json
import logging
from collections import Counter
from pathlib import Path
from typing import Dict, Any
from .cache import cache_metrics
logger = logging.getLogger(__name__)
# Dedicated error logger with file handler
_error_log_path = Path(__file__).resolve().parent.parent / "search_engine_error.log"
_error_handler = logging.FileHandler(_error_log_path, encoding="utf-8")
_error_handler.setLevel(logging.WARNING)
_error_handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
error_logger = logging.getLogger("search_engine_error")
error_logger.addHandler(_error_handler)
error_logger.propagate = False
# Analytics file
ANALYTICS_FILE = Path(__file__).resolve().parent.parent / "search_analytics.json"
# ----------------------------------------------------------------------
# Custom exception hierarchy
# ----------------------------------------------------------------------
class SearchEngineError(Exception):
"""Base class for all search-engine related errors."""
class NetworkError(SearchEngineError):
"""Raised when a network request fails (e.g., timeout, DNS error)."""
class ParseError(SearchEngineError):
"""Raised when HTML or other content cannot be parsed."""
class RateLimitError(SearchEngineError):
"""Raised when the remote service returns a rate-limit (HTTP 429)."""
# ----------------------------------------------------------------------
# Analytics helpers
# ----------------------------------------------------------------------
def _load_analytics() -> Dict[str, Any]:
"""Load analytics data from the JSON file, creating defaults if missing."""
if not ANALYTICS_FILE.exists():
default = {
"total_queries": 0,
"successful_queries": 0,
"failed_queries": 0,
"cache_hits": 0,
"cache_misses": 0,
"query_patterns": {},
}
_save_analytics(default)
return default
try:
with open(ANALYTICS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.warning(f"Failed to load analytics file: {e}")
return {
"total_queries": 0,
"successful_queries": 0,
"failed_queries": 0,
"cache_hits": 0,
"cache_misses": 0,
"query_patterns": {},
}
def _save_analytics(data: Dict[str, Any]) -> None:
"""Persist analytics data to the JSON file."""
try:
with open(ANALYTICS_FILE, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
except Exception as e:
logger.warning(f"Failed to write analytics file: {e}")
def _record_query(query: str, success: bool, cache_hit: bool) -> None:
"""Update analytics for a single query execution."""
analytics = _load_analytics()
analytics["total_queries"] += 1
if success:
analytics["successful_queries"] += 1
else:
analytics["failed_queries"] += 1
if cache_hit:
analytics["cache_hits"] += 1
cache_metrics["hits"] += 1
else:
analytics["cache_misses"] += 1
cache_metrics["misses"] += 1
patterns = analytics["query_patterns"]
entry = patterns.get(query, {"count": 0, "successes": 0})
entry["count"] += 1
if success:
entry["successes"] += 1
patterns[query] = entry
_save_analytics(analytics)
def get_search_stats() -> Dict[str, Any]:
"""Return aggregated search analytics."""
analytics = _load_analytics()
total = analytics.get("total_queries", 0) or 1
success_rate = analytics.get("successful_queries", 0) / total
cache_total = analytics.get("cache_hits", 0) + analytics.get("cache_misses", 0) or 1
cache_hit_rate = analytics.get("cache_hits", 0) / cache_total
pattern_counter = Counter({
q: data["count"] for q, data in analytics.get("query_patterns", {}).items()
})
most_common = [q for q, _ in pattern_counter.most_common(5)]
return {
"most_common_queries": most_common,
"success_rate": success_rate,
"cache_hit_rate": cache_hit_rate,
"total_queries": analytics.get("total_queries", 0),
"successful_queries": analytics.get("successful_queries", 0),
"failed_queries": analytics.get("failed_queries", 0),
"cache_hits": analytics.get("cache_hits", 0),
"cache_misses": analytics.get("cache_misses", 0),
"cache_evictions": cache_metrics["evictions"],
"runtime_cache_hits": cache_metrics["hits"],
"runtime_cache_misses": cache_metrics["misses"],
}