Odysseus v1.0

This commit is contained in:
pewdiepie-archdaemon
2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
# services/memory/__init__.py
"""Memory service — persistent memory storage and retrieval."""
from .service import MemoryService, Memory, MemorySearchResult
from .memory import MemoryManager
from .memory_vector import MemoryVectorStore
__all__ = [
"MemoryService",
"Memory",
"MemorySearchResult",
"MemoryManager",
"MemoryVectorStore",
]

359
services/memory/memory.py Normal file
View File

@@ -0,0 +1,359 @@
import json
import logging
import os
import time
import uuid
import re
from typing import List, Dict, Tuple
from datetime import datetime
logger = logging.getLogger(__name__)
def tokenize(text: str) -> List[str]:
"""Simple tokenizer that splits on whitespace and removes punctuation."""
return [word.strip('.,!?";') for word in text.split()]
def get_text_similarity(text1: str, text2: str) -> float:
"""Calculate Jaccard similarity between two texts."""
if not text1 or not text2:
return 0.0
tokens1 = set(tokenize(text1.lower()))
tokens2 = set(tokenize(text2.lower()))
if not tokens1 and not tokens2:
return 1.0
if not tokens1 or not tokens2:
return 0.0
intersection = tokens1.intersection(tokens2)
union = tokens1.union(tokens2)
return len(intersection) / len(union)
class MemoryManager:
def __init__(self, data_dir: str):
self.memory_file = os.path.join(data_dir, "memory.json")
self.ensure_file_exists()
def extract_memory_from_chat(self, chat_history: List[Dict], session_id: str = None) -> List[Dict]:
"""
Extract memory entries from chat history as a fallback when LLM fails.
Args:
chat_history: List of chat messages with 'role' and 'content' keys
session_id: Optional session ID to associate with extracted memories
Returns:
List of memory entries with text, timestamp, and optional session_id
"""
memories = []
for msg in chat_history:
if msg.get("role") == "assistant":
content = str(msg.get("content", ""))
lines = content.split('\n')
for line in lines:
line = line.strip()
# Look for bullet points or numbered lists that might contain memories
if re.match(r'^[-*•]|\d+\.', line):
# Extract the text after the bullet/number
text_match = re.match(r'^[-*•]|\d+\.\s*(.*)', line)
if text_match:
text = text_match.group(1).strip()
if text:
memories.append({
"text": text,
"timestamp": int(datetime.now().timestamp()),
"session_id": session_id
})
# If we see a heading that suggests memories
elif re.search(r'memory|fact|note|remember', line, re.I):
pass
# If we see a clear separator or end
elif re.match(r'^={3,}|-{3,}|_{3,}', line):
pass
return memories
def process_inline_memory_command(self, message: str) -> Tuple[bool, str]:
"""
Check if a message is an inline memory command (e.g. "remember: X").
Args:
message: The user message to check
Returns:
Tuple of (is_command, extracted_text) where is_command is True if
the message matches the memory command pattern
"""
# Pattern for memory commands: "remember: X", "memorize: X", "save: X", etc.
pattern = r'^(?:remember|memorize|save|note|store)[:\-]?\s+(.+)$'
match = re.match(pattern, message.strip(), re.IGNORECASE)
if match:
return True, match.group(1).strip()
else:
return False, ""
def ensure_file_exists(self):
"""Create memory file if it doesn't exist."""
if not os.path.exists(self.memory_file):
with open(self.memory_file, 'w', encoding='utf-8') as f:
json.dump([], f, ensure_ascii=False, indent=2)
def load_all(self) -> List[Dict]:
"""Load all memory entries from JSON file (unfiltered)."""
if not os.path.exists(self.memory_file):
return []
try:
with open(self.memory_file, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, list):
return self._validate_entries(data)
except (json.JSONDecodeError, PermissionError) as e:
logger.error("Error loading memory.json: %s", e)
return self._migrate_from_legacy()
return []
def load(self, owner: str = None) -> List[Dict]:
"""Load memory entries, filtered by owner."""
entries = self.load_all()
if owner is None:
return entries
return [e for e in entries if e.get("owner") == owner]
def claim_ownerless(self, owner: str):
"""Assign all ownerless memory entries to the given owner. Run once to migrate."""
entries = self.load_all()
changed = False
for e in entries:
if not e.get("owner"):
e["owner"] = owner
changed = True
if changed:
self.save(entries)
logger.info("Claimed %d ownerless memories for %s", sum(1 for e in entries if e.get("owner") == owner), owner)
def _validate_entries(self, entries: List[Dict]) -> List[Dict]:
"""Ensure all entries have required fields."""
validated = []
for entry in entries:
if "id" not in entry:
entry["id"] = str(uuid.uuid4())
if "timestamp" not in entry:
entry["timestamp"] = int(time.time())
if "source" not in entry:
entry["source"] = "unknown"
if "category" not in entry:
entry["category"] = "fact"
validated.append(entry)
return validated
def _migrate_from_legacy(self) -> List[Dict]:
"""Migrate from old text format to JSON if needed."""
legacy_path = os.path.join(os.path.dirname(self.memory_file), "memory.txt")
if not os.path.exists(legacy_path):
return []
logger.info("Converting legacy memory.txt to new JSON format")
try:
with open(legacy_path, "r", encoding="utf-8") as f:
lines = [ln.strip() for ln in f.readlines() if ln.strip()]
entries = []
for line in lines:
entries.append({
"id": str(uuid.uuid4()),
"text": line,
"timestamp": int(time.time()),
"source": "user",
"category": "fact"
})
self.save(entries)
return entries
except Exception as e:
logger.error("Failed to convert legacy memory: %s", e)
return []
def save(self, entries: List[Dict]):
"""Save memory entries to JSON file."""
# Validate entries before saving
for entry in entries:
if "id" not in entry:
entry["id"] = str(uuid.uuid4())
if "timestamp" not in entry:
entry["timestamp"] = int(time.time())
if "source" not in entry:
entry["source"] = "user"
if "category" not in entry:
entry["category"] = "fact"
# Use atomic write
tmp_file = self.memory_file + ".tmp"
with open(tmp_file, "w", encoding="utf-8") as f:
json.dump(entries, f, ensure_ascii=False, indent=2)
os.replace(tmp_file, self.memory_file)
def add_entry(self, text: str, source: str = "user", category: str = "fact", owner: str = None) -> Dict:
"""Add a new memory entry."""
if not text.strip():
raise ValueError("Memory text cannot be empty")
entry = {
"id": str(uuid.uuid4()),
"text": text.strip(),
"timestamp": int(time.time()),
"source": source,
"category": category
}
if owner:
entry["owner"] = owner
return entry
def find_duplicates(self, text: str, entries: List[Dict] = None) -> List[Dict]:
"""Find duplicate memory entries based on text content."""
if entries is None:
entries = self.load()
text_lower = text.strip().lower()
return [entry for entry in entries if entry["text"].lower() == text_lower]
def categorize_memory_by_relevance(self, message: str, memories: list):
"""Categorize memories by type and relevance"""
categories = {
"contacts": [],
"preferences": [],
"facts": [],
"tasks": []
}
msg_lower = message.lower()
for mem in memories:
text_lower = mem["text"].lower()
# Contact info
if any(word in text_lower for word in ["phone", "email", "address", "lives", "works"]):
if any(word in msg_lower for word in ["contact", "phone", "address", "email"]):
categories["contacts"].append(mem)
# Personal preferences
elif any(word in text_lower for word in ["likes", "dislikes", "prefers", "favorite"]):
if any(word in msg_lower for word in ["like", "prefer", "favorite", "want"]):
categories["preferences"].append(mem)
# Tasks and todos
elif any(word in text_lower for word in ["todo", "task", "remind", "meeting"]):
if any(word in msg_lower for word in ["todo", "task", "schedule", "remind"]):
categories["tasks"].append(mem)
# General facts - only if very relevant
else:
if get_text_similarity(message, mem["text"]) > 0.4:
categories["facts"].append(mem)
return categories
def get_relevant_memories(self, query: str, memories: list, threshold: float = 0.05, max_items: int = 8):
"""Get memories that are relevant to the query based on text similarity and semantic keyword matching."""
if not memories or not query.strip():
return []
# Define keyword categories for semantic matching
identity_words = ["name", "who", "i", "am", "called", "identity", "myself", "me", "my"]
contact_words = ["phone", "email", "address", "contact", "number", "where", "located", "reach"]
preference_words = ["like", "prefer", "favorite", "want", "love", "hate", "dislike", "enjoy", "interested"]
task_words = ["todo", "task", "remind", "meeting", "appointment", "schedule", "deadline"]
fact_words = ["what", "when", "where", "how", "why", "explain", "describe", "information", "know"]
query_lower = query.lower()
# Determine query type based on keywords
query_type = None
if any(word in query_lower for word in identity_words):
query_type = "identity"
elif any(word in query_lower for word in contact_words):
query_type = "contact"
elif any(word in query_lower for word in preference_words):
query_type = "preference"
elif any(word in query_lower for word in task_words):
query_type = "task"
elif any(word in query_lower for word in fact_words):
query_type = "fact"
relevant = []
identity_memories = []
other_memories = []
# Separate identity memories from others
for memory in memories:
memory_text = memory["text"].lower()
# Check if this is an identity memory (contains name patterns or identity indicators)
is_identity = any([
re.search(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', memory["text"]),
any(word in memory_text for word in ["name is", "i'm", "i am", "called", "my name", "named", "call me"])
])
if is_identity:
identity_memories.append(memory)
else:
other_memories.append(memory)
# For identity queries, include all identity memories regardless of similarity
if query_type == "identity" and identity_memories:
# Give them high scores to ensure they're included first
for memory in identity_memories:
relevant.append((0.9, memory)) # High score for identity memories in identity queries
# Process other memories with similarity scoring
for memory in other_memories:
memory_text = memory["text"].lower()
memory_tokens = set(tokenize(memory_text))
query_tokens = set(tokenize(query_lower))
# Calculate base Jaccard similarity
if not query_tokens or not memory_tokens:
continue
base_similarity = len(query_tokens & memory_tokens) / len(query_tokens | memory_tokens)
final_score = base_similarity
# Apply boosts based on semantic matching
if query_type == "contact":
# Boost memories with contact information
has_contact_info = any(word in memory_text for word in ["@gmail.com", "@", ".com",
"phone", "number", "address",
"http", "www", "tel:"])
if has_contact_info:
final_score *= 1.4 # 40% boost for contact-related memories
elif query_type == "preference":
# Boost memories with preference indicators
has_preference = any(word in memory_text for word in ["like", "love", "hate", "dislike",
"prefer", "favorite", "enjoy", "interested"])
if has_preference:
final_score *= 1.3 # 30% boost for preference-related memories
elif query_type == "task":
# Boost memories with task indicators
has_task = any(word in memory_text for word in ["todo", "task", "remind", "meeting",
"appointment", "schedule", "deadline", "need to"])
if has_task:
final_score *= 1.3 # 30% boost for task-related memories
# Always consider exact phrase matches as highly relevant
if query.lower() in memory["text"].lower():
final_score = max(final_score, 0.8) # Ensure high relevance for exact matches
# Include memory if it meets threshold after boosts
if final_score >= threshold:
relevant.append((final_score, memory))
# Sort by final score (descending) and return top matches
relevant.sort(key=lambda x: x[0], reverse=True)
return [mem for _, mem in relevant[:max_items]]

View File

@@ -0,0 +1,533 @@
"""
memory_extractor.py
Background auto-extraction of facts from chat conversations.
After each LLM response, this module sends the last few messages to the LLM
asking it to extract memorable facts, then stores them in both memory.json
and the FAISS vector index.
Periodically audits all memories via LLM to consolidate duplicates,
rewrite vague entries, and remove junk.
"""
import hashlib
import json
import logging
import os
import re
from typing import Optional
logger = logging.getLogger(__name__)
def _tidy_state_path(memory_manager) -> str:
"""Sidecar JSON next to memory.json that remembers the fingerprint of
the last successfully-audited state per owner. Lets the audit short-
circuit when nothing has changed since the previous tidy — running
the LLM again on an already-clean list was wasting 30-120s per call
and occasionally timing out on the second pass."""
return os.path.join(os.path.dirname(memory_manager.memory_file), "memory_tidy_state.json")
def _fingerprint_entries(entries) -> str:
"""Stable hash of an owner's memories — order-independent, depends
only on id+text+category. Any add/edit/delete invalidates it."""
items = sorted(
(str(e.get("id", "")), e.get("text", ""), e.get("category", ""))
for e in entries
)
h = hashlib.sha256()
for triple in items:
h.update(("\x1f".join(triple) + "\x1e").encode("utf-8"))
return h.hexdigest()
def _load_tidy_state(memory_manager) -> dict:
path = _tidy_state_path(memory_manager)
try:
with open(path, "r") as f:
data = json.load(f)
return data if isinstance(data, dict) else {}
except (FileNotFoundError, json.JSONDecodeError):
return {}
def _save_tidy_state(memory_manager, owner: Optional[str], fingerprint: str) -> None:
path = _tidy_state_path(memory_manager)
state = _load_tidy_state(memory_manager)
state[owner or ""] = {"fingerprint": fingerprint}
try:
with open(path, "w") as f:
json.dump(state, f, indent=2)
except OSError as e:
logger.warning(f"Could not persist tidy fingerprint: {e}")
EXTRACT_SYSTEM_PROMPT = (
"You are a memory extraction assistant. Analyze the conversation and extract ONLY "
"durable personal facts about the user that would be useful across many future conversations.\n\n"
"Good examples: name, job title, city, family members, long-term projects, strong preferences.\n"
"Bad examples: what they asked about today, temporary moods, generic statements, "
"things the assistant said, one-off tasks, opinions on the current topic.\n\n"
"Rules:\n"
"- MAX 2 facts per conversation — only the most important\n"
"- Only extract facts the USER stated or clearly implied\n"
"- Each fact must be a single short sentence (under 15 words)\n"
"- If a fact is similar to something likely already known, skip it\n"
"- If nothing durable was revealed, return []\n\n"
"Return a JSON array of objects with 'text' and 'category' fields.\n"
"Categories: 'identity', 'preference', 'fact', 'contact', 'project', 'goal'\n\n"
"Return ONLY valid JSON, no markdown fences."
)
# How many recent messages to include for extraction
CONTEXT_WINDOW = 6
AUDIT_SYSTEM_PROMPT = (
"You are a memory database curator. Be CONSERVATIVE: remove only TRUE "
"duplicates and clearly useless entries. Every distinct fact must survive. "
"When in doubt, KEEP the entry. Return the cleaned list.\n\n"
"Rules:\n"
"1. MERGE only entries that state the SAME fact in different words. If you "
"are not sure two entries are the same fact, KEEP BOTH.\n"
" Merge: 'User's name is Sam' + 'The user is called Sam' -> one.\n"
" Do NOT merge related-but-distinct facts: 'Likes Python' and 'Uses "
"Python at work' are DIFFERENT — keep both.\n"
"2. REMOVE only entries that are genuinely worthless: about what the AI did "
"(not the user), empty, or meaningless. Do NOT drop a real fact just "
"because it seems minor or niche.\n"
"3. Keep the original wording. Only lightly trim obvious redundancy — do "
"NOT aggressively rewrite or shorten.\n"
"4. Preserve the 'id' of the entry you keep when merging.\n"
"5. Never invent facts. When unsure, KEEP.\n\n"
"Return a JSON array of objects with fields: id, text, category.\n"
"Return ONLY valid JSON, no markdown fences."
)
AUDIT_INTERVAL = 5 # audit every N new memories added
_extractions_since_audit = 0
def _message_text(message) -> str:
content = getattr(message, "content", None)
if content is None and isinstance(message, dict):
content = message.get("content")
if isinstance(content, str):
return content.strip()
if isinstance(content, list):
parts = []
for item in content:
if isinstance(item, dict):
parts.append(str(item.get("text") or item.get("content") or ""))
else:
parts.append(str(item))
return " ".join(p for p in parts if p).strip()
return ""
def _message_role(message) -> str:
role = getattr(message, "role", None)
if role is None and isinstance(message, dict):
role = message.get("role")
return str(role or "").lower()
def _clean_memory_value(value: str, max_len: int = 80) -> str:
value = re.sub(r"\s+", " ", value or "").strip(" .,!?:;\"'`“”‘’")
value = re.sub(r"^(?:the|a|an)\s+", "", value, flags=re.I)
if not value or len(value) > max_len:
return ""
if re.search(r"https?://|@|[{}<>]", value):
return ""
return value
def _fallback_memory_candidates(messages) -> list[dict]:
"""Extract obvious durable facts without relying on the LLM.
This is deliberately narrow. The LLM remains the main extractor, but
simple identity/preference/goal statements should not silently vanish just
because the background model judged them too conversational.
"""
candidates = []
seen = set()
def add(text: str, category: str):
text = _clean_memory_value(text, 120)
if not text:
return
key = text.lower()
if key in seen:
return
seen.add(key)
candidates.append({"text": text, "category": category})
for msg in messages:
if _message_role(msg) != "user":
continue
text = _message_text(msg)
if not text:
continue
m = re.search(r"\bmy name is\s+([A-Za-z][A-Za-z0-9 .'\-]{1,50})\b", text, re.I)
if m:
name = _clean_memory_value(m.group(1), 50)
if name:
add(f"User's name is {name}.", "identity")
m = re.search(r"\bcall me\s+([A-Za-z][A-Za-z0-9 .'\-]{1,50})\b", text, re.I)
if m:
name = _clean_memory_value(m.group(1), 50)
if name:
add(f"User wants to be called {name}.", "identity")
m = re.search(r"\bi (?:live in|am from|'m from)\s+([^.!?\n]{2,80})", text, re.I)
if m:
place = _clean_memory_value(m.group(1), 80)
if place:
add(f"User lives in {place}.", "identity")
m = re.search(r"\bi (?:prefer|like|love|hate|do not like|don't like)\s+([^.!?\n]{4,100})", text, re.I)
if m:
preference = _clean_memory_value(m.group(1), 100)
if preference:
add(f"User prefers {preference}.", "preference")
m = re.search(
r"\bi (?:(?:want|would like|plan|hope) to|wanna) "
r"(?:go|travel|move|visit) to\s+([^.!?\n]{2,80})",
text,
re.I,
)
if m:
destination = _clean_memory_value(m.group(1), 80)
if destination:
add(f"User wants to visit {destination}.", "goal")
return candidates[:2]
def _is_text_duplicate(new_text: str, existing: list, threshold: float = 0.6) -> bool:
"""Check if new_text is too similar to any existing memory (Jaccard similarity)."""
new_tokens = set(new_text.lower().split())
if not new_tokens:
return False
for entry in existing:
old_tokens = set(entry.get("text", "").lower().split())
if not old_tokens:
continue
intersection = new_tokens & old_tokens
union = new_tokens | old_tokens
if len(intersection) / len(union) >= threshold:
return True
return False
async def extract_and_store(
session,
memory_manager,
memory_vector,
endpoint_url: str,
model: str,
headers: Optional[dict] = None,
):
"""Extract facts from recent conversation and store them.
Designed to run as a background task (asyncio.create_task).
Errors are logged, never raised.
"""
try:
from src.llm_core import llm_call_async
# Get last N messages from session
messages = session.get_context_messages()
recent = messages[-CONTEXT_WINDOW:] if len(messages) > CONTEXT_WINDOW else messages
if len(recent) < 2:
return # Need at least a user message and assistant response
fallback_facts = _fallback_memory_candidates(recent)
extraction_messages = [
{"role": "system", "content": EXTRACT_SYSTEM_PROMPT},
] + recent
facts = []
try:
raw = await llm_call_async(
endpoint_url,
model,
extraction_messages,
temperature=0.1,
max_tokens=500,
headers=headers,
)
# Parse JSON from response (handle markdown fences if model wraps them)
text = raw.strip()
if text.startswith("```"):
text = text.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
try:
facts = json.loads(text)
except json.JSONDecodeError:
logger.debug("Memory extraction returned non-JSON")
except Exception as e:
logger.warning(f"LLM memory extraction failed; using fallback candidates if available: {e}")
if not isinstance(facts, list):
facts = []
if fallback_facts:
facts = list(facts) + fallback_facts
if not facts:
logger.info("Auto memory extraction ran: 0 candidates")
return
# Get owner from session
_owner = getattr(session, 'owner', None)
existing = memory_manager.load_all()
added = 0
for fact in facts:
if isinstance(fact, str):
fact_text = fact
category = "fact"
elif isinstance(fact, dict):
fact_text = fact.get("text", "").strip()
category = fact.get("category", "fact")
else:
continue
if not fact_text or len(fact_text) < 5:
continue
# Dedup: check vector similarity first (fast), then exact text match
if memory_vector and memory_vector.healthy:
existing_id = memory_vector.find_similar(fact_text, threshold=0.72)
if existing_id:
logger.debug(f"Memory dedup (vector): '{fact_text[:50]}' matches {existing_id}")
continue
# Text dedup fallback: exact match + fuzzy similarity
user_existing = [e for e in existing if e.get("owner") == _owner or e.get("owner") is None] if _owner else existing
if memory_manager.find_duplicates(fact_text, user_existing):
continue
# Fuzzy text similarity check (catches rephrased duplicates when vector index is unavailable)
if _is_text_duplicate(fact_text, user_existing):
logger.debug(f"Memory dedup (fuzzy): '{fact_text[:50]}' too similar to existing")
continue
entry = memory_manager.add_entry(fact_text, source="auto", category=category, owner=_owner)
# Auto-pin identity facts (name, job, location) — core context
if category == "identity":
entry["pinned"] = True
if hasattr(session, "session_id"):
entry["session_id"] = session.session_id
elif hasattr(session, "name"):
entry["session_id"] = session.name
existing.append(entry)
# Add to vector index
if memory_vector and memory_vector.healthy:
memory_vector.add(entry["id"], fact_text)
added += 1
if added > 0:
memory_manager.save(existing)
try:
from src.event_bus import fire_event
for _ in range(added):
fire_event("memory_added", _owner)
except Exception:
logger.debug("memory_added event dispatch failed", exc_info=True)
logger.info(f"Auto-extracted {added} memories from session")
global _extractions_since_audit
_extractions_since_audit += added
if _extractions_since_audit >= AUDIT_INTERVAL:
_extractions_since_audit = 0
logger.info("Audit threshold reached, running memory audit")
await audit_memories(
memory_manager, memory_vector, endpoint_url, model, headers, owner=_owner
)
else:
logger.info("Auto memory extraction ran: 0 added")
except Exception as e:
logger.error(f"Memory extraction failed: {e}")
async def audit_memories(
memory_manager,
memory_vector,
endpoint_url: str,
model: str,
headers: Optional[dict] = None,
owner: Optional[str] = None,
):
"""Send all memories to the LLM for deduplication and consolidation.
- Merges near-duplicate entries
- Rewrites vague entries to be concise
- Removes junk / non-personal entries
- Rebuilds the vector index afterwards
Safe to call manually or from the automatic trigger in extract_and_store.
Errors are logged, never raised.
"""
try:
from src.llm_core import llm_call_async
existing = memory_manager.load(owner=owner)
if not existing:
logger.info("Memory audit: nothing to audit")
return {"before": 0, "after": 0}
before_count = len(existing)
# Skip the LLM call entirely when this exact set of memories was
# already audited — the previous tidy left them in a clean state
# and nothing has changed since. Returns instantly so the UI shows
# "Already clean" without spending 30-120s on a wasted LLM round.
# The fingerprint includes id+text+category; any add/edit/delete
# invalidates it and the audit runs normally.
current_fp = _fingerprint_entries(existing)
last_state = _load_tidy_state(memory_manager).get(owner or "") or {}
if last_state.get("fingerprint") == current_fp:
logger.info("Memory audit: state unchanged since last tidy — skipping LLM")
return {
"before": before_count,
"after": before_count,
"already_tidy": True,
}
# Build payload: list of {id, text, category} for the LLM
memory_payload = [
{"id": m["id"], "text": m["text"], "category": m.get("category", "fact")}
for m in existing
]
audit_messages = [
{"role": "system", "content": AUDIT_SYSTEM_PROMPT},
{"role": "user", "content": json.dumps(memory_payload, ensure_ascii=False)},
]
raw = await llm_call_async(
endpoint_url,
model,
audit_messages,
temperature=0.1,
# 16384 (was 2000): the deduped list of all memories can be large,
# and a reasoning model spends tokens thinking first — 2000 truncated
# the JSON so it never parsed ("bad_json").
max_tokens=16384,
headers=headers,
# Bound the call so the Tidy whirlpool can't spin indefinitely on a
# slow/large generation.
timeout=120,
)
# Parse the JSON list, tolerating reasoning-model noise: <think> blocks,
# markdown fences, leading prose, and trailing commas.
import re as _re
text = (raw or "").strip()
text = _re.sub(r'<think(?:ing)?>[\s\S]*?</think(?:ing)?>', '', text, flags=_re.I).strip()
def _loads_list(s):
if not s:
return None
for cand in (s, _re.sub(r',(\s*[}\]])', r'\1', s)):
try:
v = json.loads(cand)
if isinstance(v, list):
return v
except Exception:
continue
return None
cleaned = _loads_list(text)
if cleaned is None:
_m = _re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
if _m:
cleaned = _loads_list(_m.group(1).strip())
if cleaned is None:
_a, _b = text.find('['), text.rfind(']')
if _a >= 0 and _b > _a:
cleaned = _loads_list(text[_a:_b + 1])
if cleaned is None:
logger.error(f"Memory audit returned non-JSON: {text[:300]}")
return {"before": before_count, "after": before_count, "error": "bad_json"}
# Build lookup of original entries by ID so we can preserve metadata
originals = {m["id"]: m for m in existing}
final_entries = []
for item in cleaned:
if not isinstance(item, dict):
continue
mid = item.get("id", "")
new_text = item.get("text", "").strip()
if not new_text:
continue
if mid in originals:
# Preserve original metadata, update text + category
entry = originals[mid].copy()
entry["text"] = new_text
if item.get("category"):
entry["category"] = item["category"]
else:
# ID not found — skip to avoid inventing entries
logger.debug(f"Audit returned unknown id {mid}, skipping")
continue
final_entries.append(entry)
after_count = len(final_entries)
# Safety net against catastrophic over-deletion. A conservative tidy
# should never wipe out half the store in one pass — if the model
# returned far fewer entries than it was given (over-consolidation, a
# dropped/truncated list, or it ignored ids), treat it as a misfire and
# DON'T save. Better to no-op than to silently lose memories.
if before_count >= 8 and after_count < before_count * 0.5:
logger.warning(
f"Memory audit would cut {before_count} -> {after_count} "
f"(>50% removed) — refusing as unsafe, keeping originals"
)
return {"before": before_count, "after": before_count, "error": "unsafe_removal"}
# Merge audited entries back with other users' entries
if owner:
all_entries = memory_manager.load_all()
audited_ids = {e["id"] for e in final_entries}
other_entries = [e for e in all_entries if e.get("owner") != owner and (e.get("owner") is not None)]
# Also keep legacy entries that weren't part of this audit
for e in all_entries:
if e.get("owner") is None and e["id"] not in audited_ids and e["id"] not in {o["id"] for o in other_entries}:
other_entries.append(e)
memory_manager.save(final_entries + other_entries)
else:
memory_manager.save(final_entries)
logger.info(
f"Memory audit complete: {before_count} -> {after_count} entries "
f"({before_count - after_count} removed/merged)"
)
# Rebuild vector index
if memory_vector and memory_vector.healthy:
memory_vector.rebuild(final_entries)
# Persist the post-tidy fingerprint so the next call short-circuits
# if nothing has changed in the meantime.
_save_tidy_state(memory_manager, owner, _fingerprint_entries(final_entries))
return {"before": before_count, "after": after_count}
except Exception as e:
logger.error(f"Memory audit failed: {e}")
return {"error": str(e)}

View File

@@ -0,0 +1,175 @@
"""
memory_vector.py
ChromaDB-backed vector store for memory entries.
Shares the EmbeddingClient with RAG to save memory.
Stores pre-computed embeddings (ChromaDB does not manage embedding).
"""
import logging
from typing import List, Dict, Optional
logger = logging.getLogger(__name__)
class MemoryVectorStore:
"""Vector index over memory entries for semantic retrieval."""
COLLECTION_NAME = "odysseus_memories"
def __init__(self, data_dir: str, embedding_model=None):
self._model = embedding_model
self._collection = None
self._healthy = False
self._initialize()
def _initialize(self):
try:
from src.chroma_client import get_chroma_client
if self._model is None:
from src.embeddings import get_embedding_client
self._model = get_embedding_client()
if self._model is None:
raise RuntimeError("No embedding backend available")
logger.info(f"MemoryVectorStore using embeddings: {self._model.url}")
client = get_chroma_client()
self._collection = client.get_or_create_collection(
name=self.COLLECTION_NAME,
metadata={"hnsw:space": "cosine"},
)
self._healthy = True
count = self._collection.count()
logger.info(f"MemoryVectorStore ready (entries={count})")
except Exception as e:
logger.error(f"MemoryVectorStore init failed: {e}")
@property
def healthy(self) -> bool:
return self._healthy
def _embed(self, texts: List[str]) -> List[List[float]]:
vecs = self._model.encode(texts, normalize_embeddings=True)
return vecs.tolist()
def count(self) -> int:
"""Return the number of stored vectors."""
if not self._healthy:
return 0
return self._collection.count()
def add(self, memory_id: str, text: str):
"""Add a single memory entry to the vector index."""
if not self._healthy:
return
# Skip if already exists
existing = self._collection.get(ids=[memory_id])
if existing["ids"]:
return
embeddings = self._embed([text])
self._collection.add(
ids=[memory_id],
embeddings=embeddings,
documents=[text],
metadatas=[{"source": "memory"}],
)
def remove(self, memory_id: str):
"""Remove a memory entry. O(1) — no rebuild needed."""
if not self._healthy:
return
try:
self._collection.delete(ids=[memory_id])
except Exception as e:
logger.warning(f"memory remove {memory_id}: {e}")
def search(self, query: str, k: int = 8) -> List[Dict]:
"""Search for the most relevant memory IDs by semantic similarity.
Returns list of {"memory_id": str, "score": float}.
ChromaDB cosine distance = 1 - cosine_similarity.
We convert back: similarity = 1.0 - distance.
"""
if not self._healthy or self._collection.count() == 0:
return []
embeddings = self._embed([query])
actual_k = min(k, self._collection.count())
results = self._collection.query(
query_embeddings=embeddings,
n_results=actual_k,
)
out = []
for idx, mid in enumerate(results["ids"][0]):
distance = results["distances"][0][idx]
out.append({
"memory_id": mid,
"score": round(1.0 - distance, 4),
})
return out
def find_similar(self, text: str, threshold: float = 0.92) -> Optional[str]:
"""Check if a near-duplicate exists. Returns memory_id if found, else None."""
if not self._healthy or self._collection.count() == 0:
return None
embeddings = self._embed([text])
results = self._collection.query(
query_embeddings=embeddings,
n_results=1,
)
if results["ids"][0]:
distance = results["distances"][0][0]
similarity = 1.0 - distance
if similarity >= threshold:
return results["ids"][0][0]
return None
def rebuild(self, memories: List[Dict]):
"""Rebuild the entire index from a list of memory entries.
Each entry must have 'id' and 'text' keys."""
if not self._healthy:
return
from src.chroma_client import get_chroma_client
# Delete and recreate collection for a clean rebuild
client = get_chroma_client()
try:
client.delete_collection(self.COLLECTION_NAME)
except Exception:
pass
self._collection = client.get_or_create_collection(
name=self.COLLECTION_NAME,
metadata={"hnsw:space": "cosine"},
)
texts = []
ids = []
for mem in memories:
text = mem.get("text", "").strip()
mid = mem.get("id", "")
if text and mid:
texts.append(text)
ids.append(mid)
if texts:
# Batch in chunks of 100 to avoid oversized requests
for i in range(0, len(texts), 100):
batch_texts = texts[i:i + 100]
batch_ids = ids[i:i + 100]
embeddings = self._embed(batch_texts)
self._collection.add(
ids=batch_ids,
embeddings=embeddings,
documents=batch_texts,
metadatas=[{"source": "memory"}] * len(batch_ids),
)
logger.info(f"MemoryVectorStore rebuilt with {len(ids)} entries")

137
services/memory/service.py Normal file
View File

@@ -0,0 +1,137 @@
# services/memory/service.py
"""Memory service — persistent memory storage and retrieval."""
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
import os
from .memory import MemoryManager
from .memory_vector import MemoryVectorStore
@dataclass
class Memory:
"""A stored memory."""
id: str
text: str
timestamp: int
session_id: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
@dataclass
class MemorySearchResult:
"""Result of memory search."""
memories: List[Memory]
query: str
total: int
class MemoryService:
"""
Memory storage and retrieval service.
Usage:
service = MemoryService()
await service.remember("User prefers dark mode")
results = await service.recall("preferences")
"""
def __init__(self, data_dir: str = "data"):
self.manager = MemoryManager(data_dir)
self.vector_store = MemoryVectorStore(data_dir) if os.path.exists(
os.path.join(data_dir, "memory_vectors")
) else None
async def remember(self, text: str, session_id: Optional[str] = None) -> Memory:
"""
Store a new memory.
Args:
text: Memory content
session_id: Optional session association
Returns:
Created Memory object
"""
import uuid
import time
memory_id = str(uuid.uuid4())[:8]
timestamp = int(time.time())
entry = {
"id": memory_id,
"text": text,
"timestamp": timestamp,
"session_id": session_id,
}
self.manager.add_memory(entry)
# Also add to vector store if available
if self.vector_store:
self.vector_store.add(text, {"id": memory_id, "session_id": session_id})
return Memory(
id=memory_id,
text=text,
timestamp=timestamp,
session_id=session_id,
)
async def recall(self, query: str, top_k: int = 5) -> MemorySearchResult:
"""
Search memories.
Args:
query: Search query
top_k: Max results
Returns:
MemorySearchResult with matching memories
"""
# Try vector search first
if self.vector_store:
results = self.vector_store.search(query, k=top_k)
memories = [
Memory(
id=r.get("id", ""),
text=r.get("text", ""),
timestamp=r.get("timestamp", 0),
session_id=r.get("session_id"),
metadata=r.get("metadata", {}),
)
for r in results
]
return MemorySearchResult(memories=memories, query=query, total=len(memories))
# Fallback to keyword search
results = self.manager.search_memories(query, limit=top_k)
memories = [
Memory(
id=m.get("id", ""),
text=m.get("text", ""),
timestamp=m.get("timestamp", 0),
session_id=m.get("session_id"),
)
for m in results
]
return MemorySearchResult(memories=memories, query=query, total=len(memories))
def get_all(self, limit: int = 100) -> List[Memory]:
"""Get all memories."""
memories = self.manager.get_memories(limit=limit)
return [
Memory(
id=m.get("id", ""),
text=m.get("text", ""),
timestamp=m.get("timestamp", 0),
session_id=m.get("session_id"),
)
for m in memories
]
def delete(self, memory_id: str) -> bool:
"""Delete a memory by ID."""
return self.manager.delete_memory(memory_id)

View File

@@ -0,0 +1,209 @@
"""
skill_extractor.py
Background auto-extraction of skills from complex agent runs.
When the agent takes >= 2 rounds or >= 2 tool calls to complete a task,
we ask the LLM to distill the approach into a reusable skill.
"""
import json
import logging
from typing import Optional
logger = logging.getLogger(__name__)
SKILL_EXTRACT_PROMPT = (
"You are analyzing an AI agent's work session. The agent took {rounds} rounds "
"and {tool_count} tool calls to complete the task.\n\n"
"Extract a reusable 'skill' ONLY IF the session contains a concrete, "
"repeatable procedure the agent could follow to solve a similar problem "
"ON THE COMPUTER next time (e.g. a sequence of shell commands, code, file "
"edits, API calls, or tool usage).\n\n"
"Return null (the bare word, no JSON) when the session is NOT a reusable "
"computer procedure, including:\n"
"- The real work happened OUTSIDE the computer (the user did something "
"physically, in person, on another device, or by hand) and the agent only "
"discussed or advised it.\n"
"- A one-off, personal, or context-specific task that won't recur "
"(personal errands, a specific person/place/date, casual conversation).\n"
"- A pure question/answer or explanation with no transferable method.\n"
"- The agent failed, gave up, or the approach is not worth repeating.\n\n"
"When (and only when) a genuine reusable procedure exists, return a JSON "
"object with:\n"
'- "title": short name (under 10 words)\n'
'- "problem": what was the challenge (1-2 sentences)\n'
'- "solution": what worked (1-2 sentences)\n'
'- "steps": array of step-by-step instructions (3-7 short steps)\n'
'- "tags": array of relevant keywords (3-5 tags)\n'
'- "confidence": 0.0-1.0 how reliable AND reusable this procedure is\n\n'
"Be conservative: if in doubt, return null.\n"
"Return ONLY valid JSON (or the bare word null), no markdown fences."
)
# Skills the model is unsure about (or that read as one-offs) add clutter —
# drop anything below this confidence.
MIN_CONFIDENCE = 0.6
# How many recent messages to include
CONTEXT_WINDOW = 12
async def maybe_extract_skill(
session,
skills_manager,
endpoint_url: str,
model: str,
headers: dict,
round_count: int,
tool_count: int,
owner: Optional[str] = None,
):
"""Extract a skill if the agent run was complex enough."""
# Quiet by default; flip to DEBUG when chasing extractor issues.
logger.debug(
"[skill-extract] start: rounds=%d tools=%d model=%s owner=%s",
round_count, tool_count, model, owner,
)
if round_count < 2 and tool_count < 2:
logger.debug("[skill-extract] BELOW threshold (need rounds>=2 or tools>=2)")
return None
try:
from src.llm_core import llm_call_async
# Get recent messages
history = session.get_context_messages()
recent = history[-CONTEXT_WINDOW:] if len(history) > CONTEXT_WINDOW else history
if not recent:
logger.debug("[skill-extract] no recent messages, skipping")
return None
# Build conversation summary for extraction
conv_lines = []
for msg in recent:
role = msg.get("role", "?")
content = msg.get("content", "")
if isinstance(content, list):
content = " ".join(
b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
)
# Truncate long messages
if len(content) > 500:
content = content[:500] + "..."
conv_lines.append(f"[{role}] {content}")
conversation = "\n".join(conv_lines)
prompt = SKILL_EXTRACT_PROMPT.format(rounds=round_count, tool_count=tool_count)
import time as _time
_t0 = _time.monotonic()
logger.debug(
"[skill-extract] calling LLM (endpoint=%s, ctx=%d msgs, timeout=30s)",
endpoint_url, len(recent),
)
response = await llm_call_async(
endpoint_url,
model,
[
{"role": "system", "content": prompt},
{"role": "user", "content": f"Conversation:\n{conversation}"},
],
headers=headers,
timeout=30,
)
logger.debug(
"[skill-extract] LLM returned in %.1fs (len=%d, head=%r)",
_time.monotonic() - _t0, len(response or ""), (response or "")[:80],
)
if not response or response.strip().lower() == "null":
logger.debug(
"[skill-extract] LLM declined (returned null/empty) — "
"session deemed not a reusable procedure"
)
return None
# Some models (MiniMax, Qwen-Thinker, DeepSeek-R1) emit their
# chain-of-thought BEFORE the JSON output even when asked for
# raw JSON. `strip_think(prose=True, prompt_echo=True)` removes
# <think>…</think> tags AND prose-style "Let me analyze this…"
# preambles. Without it, json.loads bombed on character 0 every
# time and the silent-bail looked like "extractor doesn't work".
try:
from src.text_helpers import strip_think as _strip_think
response = _strip_think(response, prose=True, prompt_echo=True)
except Exception:
pass
# Parse JSON
text = response.strip()
if text.startswith("```"):
text = text.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
# After strip_think, the JSON may still be embedded inside surrounding
# commentary — slice from the first '{' to the matching last '}'.
if text and text[0] != "{":
_start = text.find("{")
_end = text.rfind("}")
if 0 <= _start < _end:
text = text[_start : _end + 1]
data = json.loads(text)
if not data or not isinstance(data, dict):
logger.debug("[skill-extract] parsed JSON not a dict, dropping")
return None
title = data.get("title", "").strip()
if not title:
logger.debug("[skill-extract] LLM returned object with no title, dropping")
return None
# Honour the model's own reliability/reusability estimate — low-
# confidence extractions are usually one-offs or shaky procedures.
try:
_conf = float(data.get("confidence", 0.7))
except (TypeError, ValueError):
_conf = 0.7
if _conf < MIN_CONFIDENCE:
logger.debug(
"[skill-extract] '%s' below confidence floor (%.2f < %.2f) — dropped",
title, _conf, MIN_CONFIDENCE,
)
return None
# Check for duplicate skills
existing = skills_manager.load(owner=owner)
for sk in existing:
if sk.get("title", "").lower() == title.lower():
logger.debug("[skill-extract] '%s' already exists — dropped as duplicate", title)
return None
entry = skills_manager.add_skill(
title=title,
problem=data.get("problem", ""),
solution=data.get("solution", ""),
steps=data.get("steps", []),
tags=data.get("tags", []),
source="learned",
confidence=data.get("confidence", 0.7),
session_id=getattr(session, "session_id", None),
owner=owner,
)
try:
from src.event_bus import fire_event
fire_event("skill_added", owner)
except Exception:
logger.debug("skill_added event dispatch failed", exc_info=True)
logger.info("Auto-extracted skill: %s (id=%s)", title, entry["id"])
return entry
except json.JSONDecodeError as e:
logger.debug("[skill-extract] non-JSON LLM response, dropping: %s", e)
return None
except Exception as e:
# Real exceptions stay INFO+warning so they don't get lost when
# users only have default log level. `exc_info=True` ships the
# full traceback so timeouts vs auth vs import errors are
# distinguishable from outside.
logger.warning("[skill-extract] FAILED: %s", e, exc_info=True)
return None

View File

@@ -0,0 +1,444 @@
"""SKILL.md parser & writer.
Reads/writes a single skill from a `SKILL.md` file with YAML frontmatter
and a structured markdown body. Inspired by Hermes' skills format
(https://hermes-agent.nousresearch.com/docs/user-guide/features/skills).
Frontmatter shape (YAML):
---
name: open-pr-from-branch
description: One-line summary surfaced in the skills index.
version: 1.0.0
category: dev
tags: [git, github]
platforms: [linux, macos] # optional
requires_toolsets: [] # optional
fallback_for_toolsets: [] # optional
status: published # draft | published
confidence: 0.8 # 0..1
source: learned # learned | taught | imported
teacher_model: claude-opus-4-7 # optional
created: 2026-05-09T21:43:00Z
---
Body sections (any subset; rendered as headings):
## When to Use
Trigger conditions in plain English.
## Procedure
1. First step
2. Second step
## Pitfalls
- Common failure mode + how to recover
## Verification
- How to confirm success
Anything else (raw paragraphs after the last known section) is preserved
in `body_extra` and round-trips on save.
Usage counters (`uses`, `last_used`) live in a sidecar `_usage.json` keyed
by skill name, so the SKILL.md file doesn't churn on every retrieval.
"""
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass, field
from datetime import datetime
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Slugify
# ---------------------------------------------------------------------------
_SLUG_RE = re.compile(r"[^a-z0-9]+")
def slugify(text: str, fallback: str = "skill") -> str:
"""Convert a free-form title to a kebab-case slug suitable for a directory
name. Strips non-alphanumerics, collapses runs, trims leading/trailing
dashes. Caps at 60 chars."""
s = str(text or "").strip().lower()
s = _SLUG_RE.sub("-", s)
s = s.strip("-")
return (s or fallback)[:60]
# ---------------------------------------------------------------------------
# Frontmatter (minimal YAML — we don't pull in PyYAML for one feature)
# ---------------------------------------------------------------------------
# We accept a tiny subset of YAML: scalar `key: value`, inline lists `[a, b]`,
# and block lists with `-`. That covers everything in our schema and avoids
# a new dependency.
_FM_KEY_RE = re.compile(r"^([a-z_][a-z0-9_]*):\s*(.*)$", re.IGNORECASE)
_FM_BLOCK_LIST_RE = re.compile(r"^\s*-\s*(.*)$")
def _parse_scalar(raw: str) -> Any:
raw = raw.strip()
if raw == "":
return ""
if raw.startswith("[") and raw.endswith("]"):
inner = raw[1:-1].strip()
if not inner:
return []
return [_parse_scalar(p) for p in _split_top_level(inner, ",")]
if raw.lower() in ("true", "yes"):
return True
if raw.lower() in ("false", "no"):
return False
if raw.lower() in ("null", "none", "~"):
return None
if (raw[0] == raw[-1]) and raw[0] in ("'", '"'):
return raw[1:-1]
# Try number
try:
if "." in raw:
return float(raw)
return int(raw)
except ValueError:
pass
return raw
def _split_top_level(s: str, sep: str) -> List[str]:
"""Split `s` on `sep` ignoring separators inside [] or quotes."""
out, buf, depth, quote = [], [], 0, None
for ch in s:
if quote:
buf.append(ch)
if ch == quote:
quote = None
continue
if ch in ("'", '"'):
quote = ch
buf.append(ch)
continue
if ch == "[":
depth += 1
elif ch == "]":
depth = max(0, depth - 1)
if ch == sep and depth == 0:
out.append("".join(buf).strip())
buf = []
continue
buf.append(ch)
if buf:
out.append("".join(buf).strip())
return out
def parse_frontmatter(text: str) -> tuple[Dict[str, Any], str]:
"""Pull the YAML frontmatter out of a SKILL.md and return (fm, body)."""
if not text.startswith("---"):
return {}, text
end = text.find("\n---", 3)
if end < 0:
return {}, text
fm_text = text[3:end].lstrip("\n")
body = text[end + 4:].lstrip("\n")
fm: Dict[str, Any] = {}
pending_key: Optional[str] = None
for line in fm_text.splitlines():
if not line.strip() or line.lstrip().startswith("#"):
continue
m = _FM_KEY_RE.match(line)
if m:
key, val = m.group(1), m.group(2)
if val.strip() == "":
pending_key = key
fm[key] = []
else:
fm[key] = _parse_scalar(val)
pending_key = None
continue
m2 = _FM_BLOCK_LIST_RE.match(line)
if m2 and pending_key:
existing = fm.get(pending_key)
if not isinstance(existing, list):
fm[pending_key] = []
fm[pending_key].append(_parse_scalar(m2.group(1)))
return fm, body
def _emit_scalar(v: Any) -> str:
if v is None:
return "null"
if isinstance(v, bool):
return "true" if v else "false"
if isinstance(v, (int, float)):
return str(v)
if isinstance(v, list):
return "[" + ", ".join(_emit_scalar(x) for x in v) + "]"
s = str(v)
if any(c in s for c in (":", "#", "\n", "[", "]", "{", "}", ",", "&", "*", "!", "|", ">", "'", '"', "%", "@")):
return json.dumps(s)
return s
def _as_list(v: Any) -> List[str]:
if v is None:
return []
if isinstance(v, list):
return [str(x) for x in v if x not in (None, "")]
return [str(v)]
def _as_float(v: Any, default: float = 0.8) -> float:
try:
return float(v)
except (TypeError, ValueError):
return default
def emit_frontmatter(fm: Dict[str, Any]) -> str:
lines = []
for k, v in fm.items():
if v is None or v == [] or v == "":
continue
lines.append(f"{k}: {_emit_scalar(v)}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Skill body sections
# ---------------------------------------------------------------------------
_KNOWN_SECTIONS = ("when_to_use", "procedure", "pitfalls", "verification")
_HEADING_TO_KEY = {
"when to use": "when_to_use",
"procedure": "procedure",
"steps": "procedure",
"pitfalls": "pitfalls",
"verification": "verification",
}
_KEY_TO_HEADING = {
"when_to_use": "When to Use",
"procedure": "Procedure",
"pitfalls": "Pitfalls",
"verification": "Verification",
}
def parse_body(body: str) -> Dict[str, Any]:
"""Split a SKILL.md body into known sections.
Returns:
{
"when_to_use": str,
"procedure": list[str], # numbered/bulleted lines
"pitfalls": list[str],
"verification": list[str],
"body_extra": str, # anything not under a known heading
}
"""
out = {k: ([] if k != "when_to_use" else "") for k in _KNOWN_SECTIONS}
out["body_extra"] = ""
if not body or not body.strip():
return out
sections: List[tuple[Optional[str], List[str]]] = [(None, [])]
for line in body.splitlines():
m = re.match(r"^##\s+(.*?)\s*$", line)
if m:
heading = m.group(1).strip().lower()
key = _HEADING_TO_KEY.get(heading)
sections.append((key, []))
continue
sections[-1][1].append(line)
for key, lines in sections:
text = "\n".join(lines).strip("\n")
if key is None:
extras = text.strip()
if extras:
out["body_extra"] = (out["body_extra"] + "\n\n" + extras).strip()
continue
if key == "when_to_use":
out["when_to_use"] = text.strip()
else:
out[key] = _parse_list_lines(text)
return out
def _parse_list_lines(text: str) -> List[str]:
"""Pull bullet/numbered lines out of a section body. Plain paragraphs are
treated as a single entry."""
items: List[str] = []
for line in (text or "").splitlines():
s = line.strip()
if not s:
continue
m = re.match(r"^(?:[-*]|\d+[.)])\s+(.*)$", s)
if m:
items.append(m.group(1).strip())
elif items:
# continuation of previous bullet
items[-1] = items[-1] + " " + s
else:
items.append(s)
return items
def emit_body(sections: Dict[str, Any]) -> str:
parts: List[str] = []
when = (sections.get("when_to_use") or "").strip()
if when:
parts.append(f"## {_KEY_TO_HEADING['when_to_use']}\n\n{when}")
for key in ("procedure", "pitfalls", "verification"):
items = sections.get(key) or []
if not items:
continue
heading = _KEY_TO_HEADING[key]
if key == "procedure":
body = "\n".join(f"{i + 1}. {x}" for i, x in enumerate(items))
else:
body = "\n".join(f"- {x}" for x in items)
parts.append(f"## {heading}\n\n{body}")
extra = (sections.get("body_extra") or "").strip()
if extra:
parts.append(extra)
return "\n\n".join(parts) + ("\n" if parts else "")
# ---------------------------------------------------------------------------
# Skill record
# ---------------------------------------------------------------------------
@dataclass
class Skill:
name: str # slug, dir name
description: str = ""
version: str = "1.0.0"
category: str = "general"
tags: List[str] = field(default_factory=list)
platforms: List[str] = field(default_factory=list)
requires_toolsets: List[str] = field(default_factory=list)
fallback_for_toolsets: List[str] = field(default_factory=list)
status: str = "draft" # draft | published
confidence: float = 0.8
source: str = "learned"
teacher_model: Optional[str] = None
owner: Optional[str] = None
created: str = "" # ISO8601
when_to_use: str = ""
procedure: List[str] = field(default_factory=list)
pitfalls: List[str] = field(default_factory=list)
verification: List[str] = field(default_factory=list)
body_extra: str = ""
# Sidecar (not persisted in SKILL.md)
uses: int = 0
last_used: Optional[int] = None
# File path on disk (set when read)
path: Optional[str] = None
# ----------------------------------------------------------------------
# Serialization
# ----------------------------------------------------------------------
def to_frontmatter(self) -> Dict[str, Any]:
fm: Dict[str, Any] = {
"name": self.name,
"description": self.description,
"version": self.version,
"category": self.category,
}
if self.tags: fm["tags"] = list(self.tags)
if self.platforms: fm["platforms"] = list(self.platforms)
if self.requires_toolsets: fm["requires_toolsets"] = list(self.requires_toolsets)
if self.fallback_for_toolsets: fm["fallback_for_toolsets"] = list(self.fallback_for_toolsets)
fm["status"] = self.status
fm["confidence"] = round(float(self.confidence), 3)
fm["source"] = self.source
if self.teacher_model: fm["teacher_model"] = self.teacher_model
if self.owner: fm["owner"] = self.owner
fm["created"] = self.created or _now_iso()
return fm
def to_dict(self) -> Dict[str, Any]:
d = {
"id": self.name, # slug doubles as id
"name": self.name,
"description": self.description,
"version": self.version,
"category": self.category,
"tags": list(self.tags),
"platforms": list(self.platforms),
"requires_toolsets": list(self.requires_toolsets),
"fallback_for_toolsets": list(self.fallback_for_toolsets),
"status": self.status,
"confidence": round(float(self.confidence), 3),
"source": self.source,
"teacher_model": self.teacher_model,
"owner": self.owner,
"created": self.created,
"when_to_use": self.when_to_use,
"procedure": list(self.procedure),
"pitfalls": list(self.pitfalls),
"verification": list(self.verification),
"body_extra": self.body_extra,
"uses": int(self.uses or 0),
"last_used": self.last_used,
"path": self.path,
}
# Back-compat aliases for the old API/UI
d["title"] = self.description or self.name.replace("-", " ").title()
d["problem"] = self.when_to_use
d["solution"] = (self.procedure[0] if self.procedure else "") if not self.body_extra else self.body_extra
d["steps"] = list(self.procedure)
return d
@classmethod
def from_markdown(cls, text: str, *, path: Optional[str] = None) -> "Skill":
fm, body = parse_frontmatter(text)
sections = parse_body(body)
raw_name = fm.get("name")
name = slugify(raw_name if raw_name not in (None, "") else fm.get("description", ""), fallback="skill")
return cls(
name=name,
description=str(fm.get("description", "") or ""),
version=str(fm.get("version", "1.0.0") or "1.0.0"),
category=str(fm.get("category", "general") or "general"),
tags=_as_list(fm.get("tags")),
platforms=_as_list(fm.get("platforms")),
requires_toolsets=_as_list(fm.get("requires_toolsets")),
fallback_for_toolsets=_as_list(fm.get("fallback_for_toolsets")),
status=str(fm.get("status", "draft") or "draft"),
confidence=_as_float(fm.get("confidence", 0.8), 0.8),
source=str(fm.get("source", "learned") or "learned"),
teacher_model=str(fm.get("teacher_model")) if fm.get("teacher_model") else None,
owner=str(fm.get("owner")) if fm.get("owner") else None,
created=str(fm.get("created") or _now_iso()),
when_to_use=sections["when_to_use"],
procedure=list(sections["procedure"]),
pitfalls=list(sections["pitfalls"]),
verification=list(sections["verification"]),
body_extra=sections["body_extra"],
path=path,
)
def to_markdown(self) -> str:
fm = emit_frontmatter(self.to_frontmatter())
body = emit_body({
"when_to_use": self.when_to_use,
"procedure": self.procedure,
"pitfalls": self.pitfalls,
"verification": self.verification,
"body_extra": self.body_extra,
})
return f"---\n{fm}\n---\n\n{body}"
def _now_iso() -> str:
return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")

610
services/memory/skills.py Normal file
View File

@@ -0,0 +1,610 @@
# services/memory/skills.py
"""Skills storage layer.
Skills live on disk as `data/skills/<category>/<name>/SKILL.md` files with
YAML frontmatter and a structured markdown body (When to Use / Procedure /
Pitfalls / Verification). See `skill_format.py` for the format.
Usage counters (`uses`, `last_used`) live in a sidecar
`data/skills/_usage.json` keyed by skill name so the SKILL.md content
doesn't churn on every retrieval.
Ownership: skills declare `owner: <username>` in frontmatter. Single-user
deployments can leave that blank.
This module also retains a JSON fallback for any legacy `data/skills.json`
entries — they're surfaced as read-only `Skill` objects so old data still
loads while a user migrates them to disk.
"""
from __future__ import annotations
import json
import logging
import os
import time
from typing import Dict, Iterable, List, Optional
from .skill_format import Skill, slugify
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Token / similarity helpers (kept for the relevance fallback)
# ---------------------------------------------------------------------------
def _tokenize(text: str) -> set:
return {w.strip('.,!?";:()[]') for w in (text or "").lower().split() if len(w) > 1}
def _jaccard(a: set, b: set) -> float:
if not a or not b:
return 0.0
return len(a & b) / len(a | b)
def _to_float(x, default: float = 0.0) -> float:
"""Coerce a possibly hand-edited frontmatter value to float without
raising — a blank or non-numeric `confidence:` in a SKILL.md must not
blow up retrieval or eviction."""
try:
return float(x)
except (TypeError, ValueError):
return default
# ---------------------------------------------------------------------------
# SkillsManager
# ---------------------------------------------------------------------------
class SkillsManager:
"""Read/write SKILL.md files under <data_dir>/skills/."""
def __init__(self, data_dir: str):
self.data_dir = data_dir
self.skills_root = os.path.join(data_dir, "skills")
self.usage_file = os.path.join(self.skills_root, "_usage.json")
self.legacy_file = os.path.join(data_dir, "skills.json") # back-compat
os.makedirs(self.skills_root, exist_ok=True)
# ----------------------------------------------------------------------
# Path helpers
# ----------------------------------------------------------------------
def _skill_dir(self, category: str, name: str) -> str:
cat = slugify(category or "general", fallback="general")
nm = slugify(name, fallback="skill")
return os.path.join(self.skills_root, cat, nm)
def _skill_file(self, category: str, name: str) -> str:
return os.path.join(self._skill_dir(category, name), "SKILL.md")
# ----------------------------------------------------------------------
# Usage sidecar
# ----------------------------------------------------------------------
def _load_usage(self) -> Dict[str, Dict]:
if not os.path.exists(self.usage_file):
return {}
try:
with open(self.usage_file) as f:
d = json.load(f)
return d if isinstance(d, dict) else {}
except Exception:
return {}
def _save_usage(self, usage: Dict[str, Dict]) -> None:
try:
from core.atomic_io import atomic_write_json
atomic_write_json(self.usage_file, usage, indent=2)
except Exception:
tmp = self.usage_file + ".tmp"
with open(tmp, "w") as f:
json.dump(usage, f, indent=2)
os.replace(tmp, self.usage_file)
def set_audit(self, name: str, verdict: str, by_teacher: bool = False,
worker_model: str = "", teacher_model: str = "") -> None:
"""Record the last test/audit result for a skill in the usage sidecar
(so it surfaces in load() without touching SKILL.md). Drives the
'verified' check + teacher mark on the card."""
import time as _t
usage = self._load_usage()
e = usage.setdefault(name, {"uses": 0, "last_used": None})
e["audit_verdict"] = verdict
e["audit_by_teacher"] = bool(by_teacher)
if worker_model:
e["audit_worker_model"] = worker_model
if teacher_model:
e["audit_teacher_model"] = teacher_model
e["audited_at"] = _t.time()
self._save_usage(usage)
def set_necessity(self, name: str, necessary: bool,
redundant_with=None, reason: str = "") -> None:
"""Record the advisory 'is this skill necessary?' judgment in the usage
sidecar. Surfaced on the card as a flag; never acts on the skill."""
usage = self._load_usage()
e = usage.setdefault(name, {"uses": 0, "last_used": None})
e["necessity"] = {
"necessary": bool(necessary),
"redundant_with": list(redundant_with or []),
"reason": str(reason or ""),
}
self._save_usage(usage)
# ----------------------------------------------------------------------
# Disk scan
# ----------------------------------------------------------------------
def _iter_skill_files(self) -> Iterable[str]:
if not os.path.isdir(self.skills_root):
return
for root, _dirs, files in os.walk(self.skills_root, followlinks=False):
if "SKILL.md" in files:
yield os.path.join(root, "SKILL.md")
def _read_skill(self, path: str) -> Optional[Skill]:
try:
with open(path) as f:
text = f.read()
return Skill.from_markdown(text, path=path)
except Exception as e:
logger.warning(f"Failed to parse {path}: {e}")
return None
def _write_skill(self, sk: Skill) -> str:
path = self._skill_file(sk.category or "general", sk.name)
os.makedirs(os.path.dirname(path), exist_ok=True)
from core.atomic_io import atomic_write_text
atomic_write_text(path, sk.to_markdown())
sk.path = path
return path
def backfill_owner(self, primary_owner: str, valid_owners: Optional[set[str]] = None) -> int:
"""Assign legacy/unclaimed skill files to the primary owner.
Skills are disk-backed, so the DB legacy-owner migration cannot fix
them. If strict owner filtering is enabled and SKILL.md files have no
owner or an owner from a deleted/test account, the UI appears empty even
though files still exist. This mirrors the DB legacy-owner sweep.
"""
primary_owner = (primary_owner or "").strip()
if not primary_owner:
return 0
valid_owners = set(valid_owners or [])
changed = 0
for path in self._iter_skill_files():
sk = self._read_skill(path)
if not sk:
continue
owner = (sk.owner or "").strip()
if owner == primary_owner:
continue
if owner and owner in valid_owners:
continue
sk.owner = primary_owner
try:
self._write_skill(sk)
changed += 1
except Exception as e:
logger.warning("Failed to backfill owner for skill %s: %s", sk.name, e)
return changed
# ----------------------------------------------------------------------
# Public API — keeps the old method names so callers don't break
# ----------------------------------------------------------------------
def load_all(self) -> List[Dict]:
"""Return every skill as a plain dict, plus any legacy JSON entries."""
usage = self._load_usage()
out: List[Dict] = []
seen_names: set[str] = set()
for path in self._iter_skill_files():
sk = self._read_skill(path)
if not sk:
continue
d = sk.to_dict()
u = usage.get(sk.name) or {}
d["uses"] = int(u.get("uses", 0))
d["last_used"] = u.get("last_used")
d["audit_verdict"] = u.get("audit_verdict")
d["audit_by_teacher"] = bool(u.get("audit_by_teacher"))
d["audit_worker_model"] = u.get("audit_worker_model")
d["audit_teacher_model"] = u.get("audit_teacher_model")
d["audited_at"] = u.get("audited_at")
d["necessity"] = u.get("necessity")
out.append(d)
seen_names.add(sk.name)
# Legacy JSON entries — surfaced as draft, not editable from new flow
if os.path.exists(self.legacy_file):
try:
with open(self.legacy_file) as f:
legacy = json.load(f)
if isinstance(legacy, list):
for row in legacy:
if not isinstance(row, dict):
continue
name = slugify(row.get("title") or row.get("id") or "skill")
if name in seen_names:
continue
out.append({
"id": row.get("id") or name,
"name": name,
"description": row.get("title", ""),
"version": "0.0.1",
"category": "legacy",
"tags": row.get("tags") or [],
"status": row.get("status") or "draft",
"confidence": row.get("confidence", 0.5),
"source": row.get("source", "imported"),
"owner": row.get("owner"),
"when_to_use": row.get("problem", ""),
"procedure": row.get("steps") or [],
"pitfalls": [],
"verification": [],
"body_extra": row.get("solution", ""),
"title": row.get("title", ""),
"problem": row.get("problem", ""),
"solution": row.get("solution", ""),
"steps": row.get("steps") or [],
"uses": row.get("uses", 0),
"last_used": row.get("last_used"),
"_legacy": True,
})
except Exception:
pass
return out
def load(self, owner: Optional[str] = None) -> List[Dict]:
entries = self.load_all()
if owner is None:
return entries
# SECURITY: strict ownership filter. The previous predicate also
# included skills with NO owner field (`not s.get("owner")`), which
# leaked legacy / un-stamped skills to every authenticated user.
# Hide them now; the owner needs to be backfilled on disk if those
# skills should be visible to a specific user.
return [s for s in entries if s.get("owner") == owner]
# ----------------------------------------------------------------------
# CRUD — disk-backed
# ----------------------------------------------------------------------
def add_skill(
self,
title: str = "",
problem: str = "",
solution: str = "",
steps: Optional[List[str]] = None,
tags: Optional[List[str]] = None,
source: str = "learned",
teacher_model: Optional[str] = None,
confidence: float = 0.8,
session_id: Optional[str] = None,
owner: Optional[str] = None,
# New-schema fields (optional; fall back to old shape if absent)
name: Optional[str] = None,
description: Optional[str] = None,
category: str = "general",
when_to_use: Optional[str] = None,
procedure: Optional[List[str]] = None,
pitfalls: Optional[List[str]] = None,
verification: Optional[List[str]] = None,
platforms: Optional[List[str]] = None,
requires_toolsets: Optional[List[str]] = None,
fallback_for_toolsets: Optional[List[str]] = None,
status: str = "draft",
version: str = "1.0.0",
) -> Dict:
# Normalize name
nm = slugify(name or title or description or "skill")
# Free dedup-at-creation (always, no API): for LLM-authored skills,
# skip if a near-identical skill already exists (Jaccard over
# name+description+when_to_use+procedure). User-authored skills are
# never auto-skipped — a human asked for it. The every-X AI audit
# handles the fuzzier near-duplicates this cheap check won't catch.
_all = self.load_all()
if source != "user":
cand = _tokenize(" ".join([
nm, (description or title or ""),
(when_to_use if when_to_use is not None else (problem or "")),
" ".join(procedure if procedure is not None else (steps or [])),
]))
if cand:
for s in _all:
ex = _tokenize(" ".join([
s.get("name", ""), s.get("description", ""),
s.get("when_to_use", ""),
" ".join(s.get("procedure", []) or []),
]))
if _jaccard(cand, ex) >= 0.82:
# Near-identical — don't grow the library; bump the
# existing skill's usage and return it so the caller
# knows it already exists.
try:
self.record_use(s["name"])
except Exception:
pass
return {**s, "_deduped": True, "_duplicate_of": s.get("name")}
# Avoid clobbering an existing skill with the same name
existing = {s["name"] for s in _all}
base = nm
i = 2
while nm in existing:
nm = f"{base}-{i}"
i += 1
sk = Skill(
name=nm,
description=(description or title or "").strip(),
version=version,
category=category or "general",
tags=list(tags or []),
platforms=list(platforms or []),
requires_toolsets=list(requires_toolsets or []),
fallback_for_toolsets=list(fallback_for_toolsets or []),
status=status or "draft",
confidence=float(confidence),
source=source,
teacher_model=teacher_model,
owner=owner,
when_to_use=(when_to_use if when_to_use is not None else (problem or "")),
procedure=list(procedure if procedure is not None else (steps or [])),
pitfalls=list(pitfalls or []),
verification=list(verification or []),
body_extra=(solution if solution and not procedure else ""),
)
self._write_skill(sk)
return sk.to_dict()
def update_skill(self, skill_id: str, updates: Dict) -> bool:
"""`skill_id` is the slug name. Allows updating any field plus
renames if `name` changes (file is moved on disk)."""
for path in self._iter_skill_files():
sk = self._read_skill(path)
if not sk or sk.name != skill_id:
continue
old_dir = os.path.dirname(path)
# Apply updates in a Skill-shape friendly way
scalar_keys = (
"description", "version", "category", "status", "confidence",
"source", "teacher_model", "owner", "when_to_use",
"body_extra",
)
for k in scalar_keys:
if k in updates:
setattr(sk, k, updates[k])
list_keys = ("tags", "procedure", "pitfalls", "verification",
"platforms", "requires_toolsets", "fallback_for_toolsets")
for k in list_keys:
if k in updates:
setattr(sk, k, list(updates[k] or []))
# Old-schema field aliases
if "title" in updates and "description" not in updates:
sk.description = updates["title"]
if "problem" in updates and "when_to_use" not in updates:
sk.when_to_use = updates["problem"]
if "solution" in updates and "body_extra" not in updates and not sk.procedure:
sk.body_extra = updates["solution"]
if "steps" in updates and "procedure" not in updates:
sk.procedure = list(updates["steps"] or [])
# Rename
new_name = slugify(updates.get("name") or sk.name)
if new_name != sk.name:
sk.name = new_name
# Write to potentially new path
new_path = self._skill_file(sk.category, sk.name)
if new_path != path:
# Move the whole skill directory if rename or recategorize
new_dir = os.path.dirname(new_path)
if os.path.isdir(new_dir):
logger.warning(f"Skill rename target exists: {new_dir}")
return False
os.makedirs(os.path.dirname(new_dir), exist_ok=True)
os.rename(old_dir, new_dir)
# Also rename usage key
usage = self._load_usage()
if skill_id in usage:
usage[sk.name] = usage.pop(skill_id)
self._save_usage(usage)
self._write_skill(sk)
return True
return False
def delete_skill(self, skill_id: str) -> bool:
for path in self._iter_skill_files():
sk = self._read_skill(path)
if not sk or sk.name != skill_id:
continue
skill_dir = os.path.dirname(path)
try:
# Remove the whole skill dir
for root, dirs, files in os.walk(skill_dir, topdown=False):
for f in files:
os.remove(os.path.join(root, f))
for d in dirs:
os.rmdir(os.path.join(root, d))
os.rmdir(skill_dir)
except Exception as e:
logger.warning(f"Failed to remove skill dir {skill_dir}: {e}")
return False
usage = self._load_usage()
if skill_id in usage:
del usage[skill_id]
self._save_usage(usage)
return True
return False
def record_use(self, skill_id: str) -> None:
usage = self._load_usage()
entry = usage.setdefault(skill_id, {"uses": 0, "last_used": None})
entry["uses"] = int(entry.get("uses", 0)) + 1
entry["last_used"] = int(time.time())
self._save_usage(usage)
# ----------------------------------------------------------------------
# Reading a single skill (used by the skill_view tool)
# ----------------------------------------------------------------------
def read_skill_md(self, name: str) -> Optional[str]:
for path in self._iter_skill_files():
sk = self._read_skill(path)
if sk and sk.name == name:
try:
with open(path) as f:
return f.read()
except Exception:
return None
return None
def read_skill_reference(self, name: str, ref_path: str) -> Optional[str]:
"""Read a sub-file under the skill's directory (references/, etc).
Refuses path traversal."""
for path in self._iter_skill_files():
sk = self._read_skill(path)
if not sk or sk.name != name:
continue
base = os.path.realpath(os.path.dirname(path))
target = os.path.realpath(os.path.join(base, ref_path))
if os.path.commonpath([base, target]) != base or target == os.path.dirname(path):
return None
if not os.path.isfile(target):
return None
try:
with open(target) as f:
return f.read()
except Exception:
return None
return None
# ----------------------------------------------------------------------
# Index — the lightweight summary injected into the system prompt
# ----------------------------------------------------------------------
def index_for(
self,
owner: Optional[str] = None,
*,
active_toolsets: Optional[List[str]] = None,
platform: Optional[str] = None,
) -> List[Dict]:
"""Return the `[{name, description, category, status}]` list the
agent sees in its system prompt.
Includes:
- All published skills.
- Drafts written by the teacher-escalation loop
(`source == "teacher-escalation"`). The whole point of
the teacher loop is for the student to find the new
procedure on the very next turn — waiting for a manual
publish click defeats the loop.
Excludes user-created drafts (status=draft, source != teacher-
escalation) — those are work-in-progress and pollute the
prompt with half-finished procedures.
"""
active_toolsets = active_toolsets or []
out = []
for s in self.load(owner=owner):
status = s.get("status")
# Published + None (pre-status legacy) always included.
# Drafts only if the teacher wrote them.
if status not in ("published", None):
if status == "draft" and s.get("source") == "teacher-escalation":
pass # let it through
else:
continue
# Platform gating
if platform and s.get("platforms") and platform not in s["platforms"]:
continue
# requires_toolsets: hide unless every required toolset is active
req = s.get("requires_toolsets") or []
if req and not all(t in active_toolsets for t in req):
continue
# fallback_for_toolsets: hide when any of those toolsets is active
fb = s.get("fallback_for_toolsets") or []
if fb and any(t in active_toolsets for t in fb):
continue
out.append({
"name": s["name"],
"description": s.get("description") or s.get("title", ""),
"category": s.get("category", "general"),
"status": status or "published",
})
out.sort(key=lambda x: (x["category"], x["name"]))
return out
# ----------------------------------------------------------------------
# Relevance search (kept for the existing /api/skills/search endpoint
# and the `manage_skills` action="search"). Now operates on the new
# field set.
# ----------------------------------------------------------------------
def get_relevant_skills(
self,
query: str,
skills: Optional[List[Dict]] = None,
threshold: float = 0.3,
max_items: int = 5,
min_confidence: float = 0.0,
) -> List[Dict]:
if skills is None:
skills = self.load_all()
if not skills or not query.strip():
return []
# Consider published AND draft skills for relevance retrieval.
# The teacher-escalation loop writes new skills as drafts; the
# whole point is for the student to find them on the next try
# without a manual publish click. The UI flags teacher-written
# entries with a 🎓 badge so users can demote / delete bad
# ones when they spot them.
skills = [s for s in skills if s.get("status") in ("published", "draft")]
# Confidence gate (used by prompt-injection, NOT by search): a DRAFT
# skill must clear the bar to be injected. Published skills are already
# vetted, so they always qualify. Missing confidence = treat as 1.0
# (legacy skills shouldn't silently vanish). 0 disables the gate.
if min_confidence > 0:
def _passes(s):
if s.get("status") == "published":
return True
c = s.get("confidence")
if c is None:
return True # unset → don't filter (legacy)
return _to_float(c, 1.0) >= min_confidence # unparseable → pass
skills = [s for s in skills if _passes(s)]
if not skills:
return []
query_tokens = _tokenize(query)
scored = []
for sk in skills:
text = " ".join([
sk.get("name", ""),
sk.get("description", ""),
sk.get("when_to_use", ""),
" ".join(sk.get("tags", []) or []),
" ".join(sk.get("procedure", []) or []),
])
score = _jaccard(query_tokens, _tokenize(text))
for tag in sk.get("tags", []) or []:
if tag and tag in query.lower():
score = max(score, 0.3) * 1.3
if query.lower() in (sk.get("description") or "").lower():
score = max(score, 0.6)
score *= 1.0 + _to_float(sk.get("confidence"), 0.5) * 0.1
if sk.get("uses", 0) > 0:
score *= 1.05
if score >= threshold:
scored.append((score, sk))
scored.sort(key=lambda x: x[0], reverse=True)
return [sk for _, sk in scored[:max_items]]