Odysseus v1.0

2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions
--- a/services/memory/init.py
+++ b/services/memory/init.py
@@ -0,0 +1,14 @@
+# services/memory/__init__.py
+"""Memory service — persistent memory storage and retrieval."""
+
+from .service import MemoryService, Memory, MemorySearchResult
+from .memory import MemoryManager
+from .memory_vector import MemoryVectorStore
+
+__all__ = [
+    "MemoryService",
+    "Memory",
+    "MemorySearchResult",
+    "MemoryManager",
+    "MemoryVectorStore",
+]
--- a/services/memory/memory.py
+++ b/services/memory/memory.py
@@ -0,0 +1,359 @@
+
+import json
+import logging
+import os
+import time
+import uuid
+import re
+from typing import List, Dict, Tuple
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+def tokenize(text: str) -> List[str]:
+    """Simple tokenizer that splits on whitespace and removes punctuation."""
+    return [word.strip('.,!?";') for word in text.split()]
+
+def get_text_similarity(text1: str, text2: str) -> float:
+    """Calculate Jaccard similarity between two texts."""
+    if not text1 or not text2:
+        return 0.0
+    
+    tokens1 = set(tokenize(text1.lower()))
+    tokens2 = set(tokenize(text2.lower()))
+    
+    if not tokens1 and not tokens2:
+        return 1.0
+    if not tokens1 or not tokens2:
+        return 0.0
+        
+    intersection = tokens1.intersection(tokens2)
+    union = tokens1.union(tokens2)
+    
+    return len(intersection) / len(union)
+
+class MemoryManager:
+    def __init__(self, data_dir: str):
+        self.memory_file = os.path.join(data_dir, "memory.json")
+        self.ensure_file_exists()
+        
+    def extract_memory_from_chat(self, chat_history: List[Dict], session_id: str = None) -> List[Dict]:
+        """
+        Extract memory entries from chat history as a fallback when LLM fails.
+        
+        Args:
+            chat_history: List of chat messages with 'role' and 'content' keys
+            session_id: Optional session ID to associate with extracted memories
+            
+        Returns:
+            List of memory entries with text, timestamp, and optional session_id
+        """
+        memories = []
+        
+        for msg in chat_history:
+            if msg.get("role") == "assistant":
+                content = str(msg.get("content", ""))
+                lines = content.split('\n')
+                
+                for line in lines:
+                    line = line.strip()
+                    # Look for bullet points or numbered lists that might contain memories
+                    if re.match(r'^[-*•]|\d+\.', line):
+                        # Extract the text after the bullet/number
+                        text_match = re.match(r'^[-*•]|\d+\.\s*(.*)', line)
+                        if text_match:
+                            text = text_match.group(1).strip()
+                            if text:
+                                memories.append({
+                                    "text": text,
+                                    "timestamp": int(datetime.now().timestamp()),
+                                    "session_id": session_id
+                                })
+                    # If we see a heading that suggests memories
+                    elif re.search(r'memory|fact|note|remember', line, re.I):
+                        pass
+                    # If we see a clear separator or end
+                    elif re.match(r'^={3,}|-{3,}|_{3,}', line):
+                        pass
+                        
+        return memories
+        
+    def process_inline_memory_command(self, message: str) -> Tuple[bool, str]:
+        """
+        Check if a message is an inline memory command (e.g. "remember: X").
+        
+        Args:
+            message: The user message to check
+            
+        Returns:
+            Tuple of (is_command, extracted_text) where is_command is True if 
+            the message matches the memory command pattern
+        """
+        # Pattern for memory commands: "remember: X", "memorize: X", "save: X", etc.
+        pattern = r'^(?:remember|memorize|save|note|store)[:\-]?\s+(.+)$'
+        match = re.match(pattern, message.strip(), re.IGNORECASE)
+        
+        if match:
+            return True, match.group(1).strip()
+        else:
+            return False, ""
+    
+    def ensure_file_exists(self):
+        """Create memory file if it doesn't exist."""
+        if not os.path.exists(self.memory_file):
+            with open(self.memory_file, 'w', encoding='utf-8') as f:
+                json.dump([], f, ensure_ascii=False, indent=2)
+    
+    def load_all(self) -> List[Dict]:
+        """Load all memory entries from JSON file (unfiltered)."""
+        if not os.path.exists(self.memory_file):
+            return []
+
+        try:
+            with open(self.memory_file, "r", encoding="utf-8") as f:
+                data = json.load(f)
+                if isinstance(data, list):
+                    return self._validate_entries(data)
+        except (json.JSONDecodeError, PermissionError) as e:
+            logger.error("Error loading memory.json: %s", e)
+            return self._migrate_from_legacy()
+
+        return []
+
+    def load(self, owner: str = None) -> List[Dict]:
+        """Load memory entries, filtered by owner."""
+        entries = self.load_all()
+        if owner is None:
+            return entries
+        return [e for e in entries if e.get("owner") == owner]
+
+    def claim_ownerless(self, owner: str):
+        """Assign all ownerless memory entries to the given owner. Run once to migrate."""
+        entries = self.load_all()
+        changed = False
+        for e in entries:
+            if not e.get("owner"):
+                e["owner"] = owner
+                changed = True
+        if changed:
+            self.save(entries)
+            logger.info("Claimed %d ownerless memories for %s", sum(1 for e in entries if e.get("owner") == owner), owner)
+    
+    def _validate_entries(self, entries: List[Dict]) -> List[Dict]:
+        """Ensure all entries have required fields."""
+        validated = []
+        for entry in entries:
+            if "id" not in entry:
+                entry["id"] = str(uuid.uuid4())
+            if "timestamp" not in entry:
+                entry["timestamp"] = int(time.time())
+            if "source" not in entry:
+                entry["source"] = "unknown"
+            if "category" not in entry:
+                entry["category"] = "fact"
+            validated.append(entry)
+        return validated
+    
+    def _migrate_from_legacy(self) -> List[Dict]:
+        """Migrate from old text format to JSON if needed."""
+        legacy_path = os.path.join(os.path.dirname(self.memory_file), "memory.txt")
+        if not os.path.exists(legacy_path):
+            return []
+            
+        logger.info("Converting legacy memory.txt to new JSON format")
+        try:
+            with open(legacy_path, "r", encoding="utf-8") as f:
+                lines = [ln.strip() for ln in f.readlines() if ln.strip()]
+            
+            entries = []
+            for line in lines:
+                entries.append({
+                    "id": str(uuid.uuid4()),
+                    "text": line,
+                    "timestamp": int(time.time()),
+                    "source": "user",
+                    "category": "fact"
+                })
+            
+            self.save(entries)
+            return entries
+        except Exception as e:
+            logger.error("Failed to convert legacy memory: %s", e)
+            return []
+    
+    def save(self, entries: List[Dict]):
+        """Save memory entries to JSON file."""
+        # Validate entries before saving
+        for entry in entries:
+            if "id" not in entry:
+                entry["id"] = str(uuid.uuid4())
+            if "timestamp" not in entry:
+                entry["timestamp"] = int(time.time())
+            if "source" not in entry:
+                entry["source"] = "user"
+            if "category" not in entry:
+                entry["category"] = "fact"
+        
+        # Use atomic write
+        tmp_file = self.memory_file + ".tmp"
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(entries, f, ensure_ascii=False, indent=2)
+        os.replace(tmp_file, self.memory_file)
+    
+    def add_entry(self, text: str, source: str = "user", category: str = "fact", owner: str = None) -> Dict:
+        """Add a new memory entry."""
+        if not text.strip():
+            raise ValueError("Memory text cannot be empty")
+
+        entry = {
+            "id": str(uuid.uuid4()),
+            "text": text.strip(),
+            "timestamp": int(time.time()),
+            "source": source,
+            "category": category
+        }
+        if owner:
+            entry["owner"] = owner
+        return entry
+    
+    def find_duplicates(self, text: str, entries: List[Dict] = None) -> List[Dict]:
+        """Find duplicate memory entries based on text content."""
+        if entries is None:
+            entries = self.load()
+            
+        text_lower = text.strip().lower()
+        return [entry for entry in entries if entry["text"].lower() == text_lower]
+            
+    def categorize_memory_by_relevance(self, message: str, memories: list):
+        """Categorize memories by type and relevance"""
+        categories = {
+            "contacts": [],
+            "preferences": [],
+            "facts": [],
+            "tasks": []
+        }
+        
+        msg_lower = message.lower()
+        
+        for mem in memories:
+            text_lower = mem["text"].lower()
+            
+            # Contact info
+            if any(word in text_lower for word in ["phone", "email", "address", "lives", "works"]):
+                if any(word in msg_lower for word in ["contact", "phone", "address", "email"]):
+                    categories["contacts"].append(mem)
+            
+            # Personal preferences
+            elif any(word in text_lower for word in ["likes", "dislikes", "prefers", "favorite"]):
+                if any(word in msg_lower for word in ["like", "prefer", "favorite", "want"]):
+                    categories["preferences"].append(mem)
+            
+            # Tasks and todos
+            elif any(word in text_lower for word in ["todo", "task", "remind", "meeting"]):
+                if any(word in msg_lower for word in ["todo", "task", "schedule", "remind"]):
+                    categories["tasks"].append(mem)
+            
+            # General facts - only if very relevant
+            else:
+                if get_text_similarity(message, mem["text"]) > 0.4:
+                    categories["facts"].append(mem)
+        
+        return categories
+
+    def get_relevant_memories(self, query: str, memories: list, threshold: float = 0.05, max_items: int = 8):
+        """Get memories that are relevant to the query based on text similarity and semantic keyword matching."""
+        if not memories or not query.strip():
+            return []
+            
+        # Define keyword categories for semantic matching
+        identity_words = ["name", "who", "i", "am", "called", "identity", "myself", "me", "my"]
+        contact_words = ["phone", "email", "address", "contact", "number", "where", "located", "reach"]
+        preference_words = ["like", "prefer", "favorite", "want", "love", "hate", "dislike", "enjoy", "interested"]
+        task_words = ["todo", "task", "remind", "meeting", "appointment", "schedule", "deadline"]
+        fact_words = ["what", "when", "where", "how", "why", "explain", "describe", "information", "know"]
+        
+        query_lower = query.lower()
+        
+        # Determine query type based on keywords
+        query_type = None
+        if any(word in query_lower for word in identity_words):
+            query_type = "identity"
+        elif any(word in query_lower for word in contact_words):
+            query_type = "contact"
+        elif any(word in query_lower for word in preference_words):
+            query_type = "preference"
+        elif any(word in query_lower for word in task_words):
+            query_type = "task"
+        elif any(word in query_lower for word in fact_words):
+            query_type = "fact"
+        
+        relevant = []
+        identity_memories = []
+        other_memories = []
+        
+        # Separate identity memories from others
+        for memory in memories:
+            memory_text = memory["text"].lower()
+            # Check if this is an identity memory (contains name patterns or identity indicators)
+            is_identity = any([
+                re.search(r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', memory["text"]),
+                any(word in memory_text for word in ["name is", "i'm", "i am", "called", "my name", "named", "call me"])
+            ])
+            if is_identity:
+                identity_memories.append(memory)
+            else:
+                other_memories.append(memory)
+        
+        # For identity queries, include all identity memories regardless of similarity
+        if query_type == "identity" and identity_memories:
+            # Give them high scores to ensure they're included first
+            for memory in identity_memories:
+                relevant.append((0.9, memory))  # High score for identity memories in identity queries
+        
+        # Process other memories with similarity scoring
+        for memory in other_memories:
+            memory_text = memory["text"].lower()
+            memory_tokens = set(tokenize(memory_text))
+            query_tokens = set(tokenize(query_lower))
+            
+            # Calculate base Jaccard similarity
+            if not query_tokens or not memory_tokens:
+                continue
+                
+            base_similarity = len(query_tokens & memory_tokens) / len(query_tokens | memory_tokens)
+            final_score = base_similarity
+            
+            # Apply boosts based on semantic matching
+            if query_type == "contact":
+                # Boost memories with contact information
+                has_contact_info = any(word in memory_text for word in ["@gmail.com", "@", ".com", 
+                                                                     "phone", "number", "address", 
+                                                                     "http", "www", "tel:"])
+                if has_contact_info:
+                    final_score *= 1.4  # 40% boost for contact-related memories
+            
+            elif query_type == "preference":
+                # Boost memories with preference indicators
+                has_preference = any(word in memory_text for word in ["like", "love", "hate", "dislike", 
+                                                                   "prefer", "favorite", "enjoy", "interested"])
+                if has_preference:
+                    final_score *= 1.3  # 30% boost for preference-related memories
+            
+            elif query_type == "task":
+                # Boost memories with task indicators
+                has_task = any(word in memory_text for word in ["todo", "task", "remind", "meeting", 
+                                                              "appointment", "schedule", "deadline", "need to"])
+                if has_task:
+                    final_score *= 1.3  # 30% boost for task-related memories
+            
+            # Always consider exact phrase matches as highly relevant
+            if query.lower() in memory["text"].lower():
+                final_score = max(final_score, 0.8)  # Ensure high relevance for exact matches
+            
+            # Include memory if it meets threshold after boosts
+            if final_score >= threshold:
+                relevant.append((final_score, memory))
+        
+        # Sort by final score (descending) and return top matches
+        relevant.sort(key=lambda x: x[0], reverse=True)
+        return [mem for _, mem in relevant[:max_items]]
--- a/services/memory/memory_extractor.py
+++ b/services/memory/memory_extractor.py
@@ -0,0 +1,533 @@
+"""
+memory_extractor.py
+
+Background auto-extraction of facts from chat conversations.
+After each LLM response, this module sends the last few messages to the LLM
+asking it to extract memorable facts, then stores them in both memory.json
+and the FAISS vector index.
+
+Periodically audits all memories via LLM to consolidate duplicates,
+rewrite vague entries, and remove junk.
+"""
+
+import hashlib
+import json
+import logging
+import os
+import re
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+def _tidy_state_path(memory_manager) -> str:
+    """Sidecar JSON next to memory.json that remembers the fingerprint of
+    the last successfully-audited state per owner. Lets the audit short-
+    circuit when nothing has changed since the previous tidy — running
+    the LLM again on an already-clean list was wasting 30-120s per call
+    and occasionally timing out on the second pass."""
+    return os.path.join(os.path.dirname(memory_manager.memory_file), "memory_tidy_state.json")
+
+
+def _fingerprint_entries(entries) -> str:
+    """Stable hash of an owner's memories — order-independent, depends
+    only on id+text+category. Any add/edit/delete invalidates it."""
+    items = sorted(
+        (str(e.get("id", "")), e.get("text", ""), e.get("category", ""))
+        for e in entries
+    )
+    h = hashlib.sha256()
+    for triple in items:
+        h.update(("\x1f".join(triple) + "\x1e").encode("utf-8"))
+    return h.hexdigest()
+
+
+def _load_tidy_state(memory_manager) -> dict:
+    path = _tidy_state_path(memory_manager)
+    try:
+        with open(path, "r") as f:
+            data = json.load(f)
+        return data if isinstance(data, dict) else {}
+    except (FileNotFoundError, json.JSONDecodeError):
+        return {}
+
+
+def _save_tidy_state(memory_manager, owner: Optional[str], fingerprint: str) -> None:
+    path = _tidy_state_path(memory_manager)
+    state = _load_tidy_state(memory_manager)
+    state[owner or ""] = {"fingerprint": fingerprint}
+    try:
+        with open(path, "w") as f:
+            json.dump(state, f, indent=2)
+    except OSError as e:
+        logger.warning(f"Could not persist tidy fingerprint: {e}")
+
+EXTRACT_SYSTEM_PROMPT = (
+    "You are a memory extraction assistant. Analyze the conversation and extract ONLY "
+    "durable personal facts about the user that would be useful across many future conversations.\n\n"
+    "Good examples: name, job title, city, family members, long-term projects, strong preferences.\n"
+    "Bad examples: what they asked about today, temporary moods, generic statements, "
+    "things the assistant said, one-off tasks, opinions on the current topic.\n\n"
+    "Rules:\n"
+    "- MAX 2 facts per conversation — only the most important\n"
+    "- Only extract facts the USER stated or clearly implied\n"
+    "- Each fact must be a single short sentence (under 15 words)\n"
+    "- If a fact is similar to something likely already known, skip it\n"
+    "- If nothing durable was revealed, return []\n\n"
+    "Return a JSON array of objects with 'text' and 'category' fields.\n"
+    "Categories: 'identity', 'preference', 'fact', 'contact', 'project', 'goal'\n\n"
+    "Return ONLY valid JSON, no markdown fences."
+)
+
+# How many recent messages to include for extraction
+CONTEXT_WINDOW = 6
+
+AUDIT_SYSTEM_PROMPT = (
+    "You are a memory database curator. Be CONSERVATIVE: remove only TRUE "
+    "duplicates and clearly useless entries. Every distinct fact must survive. "
+    "When in doubt, KEEP the entry. Return the cleaned list.\n\n"
+    "Rules:\n"
+    "1. MERGE only entries that state the SAME fact in different words. If you "
+    "are not sure two entries are the same fact, KEEP BOTH.\n"
+    "   Merge: 'User's name is Sam' + 'The user is called Sam' -> one.\n"
+    "   Do NOT merge related-but-distinct facts: 'Likes Python' and 'Uses "
+    "Python at work' are DIFFERENT — keep both.\n"
+    "2. REMOVE only entries that are genuinely worthless: about what the AI did "
+    "(not the user), empty, or meaningless. Do NOT drop a real fact just "
+    "because it seems minor or niche.\n"
+    "3. Keep the original wording. Only lightly trim obvious redundancy — do "
+    "NOT aggressively rewrite or shorten.\n"
+    "4. Preserve the 'id' of the entry you keep when merging.\n"
+    "5. Never invent facts. When unsure, KEEP.\n\n"
+    "Return a JSON array of objects with fields: id, text, category.\n"
+    "Return ONLY valid JSON, no markdown fences."
+)
+
+AUDIT_INTERVAL = 5  # audit every N new memories added
+_extractions_since_audit = 0
+
+
+def _message_text(message) -> str:
+    content = getattr(message, "content", None)
+    if content is None and isinstance(message, dict):
+        content = message.get("content")
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, list):
+        parts = []
+        for item in content:
+            if isinstance(item, dict):
+                parts.append(str(item.get("text") or item.get("content") or ""))
+            else:
+                parts.append(str(item))
+        return " ".join(p for p in parts if p).strip()
+    return ""
+
+
+def _message_role(message) -> str:
+    role = getattr(message, "role", None)
+    if role is None and isinstance(message, dict):
+        role = message.get("role")
+    return str(role or "").lower()
+
+
+def _clean_memory_value(value: str, max_len: int = 80) -> str:
+    value = re.sub(r"\s+", " ", value or "").strip(" .,!?:;\"'`“”‘’")
+    value = re.sub(r"^(?:the|a|an)\s+", "", value, flags=re.I)
+    if not value or len(value) > max_len:
+        return ""
+    if re.search(r"https?://|@|[{}<>]", value):
+        return ""
+    return value
+
+
+def _fallback_memory_candidates(messages) -> list[dict]:
+    """Extract obvious durable facts without relying on the LLM.
+
+    This is deliberately narrow. The LLM remains the main extractor, but
+    simple identity/preference/goal statements should not silently vanish just
+    because the background model judged them too conversational.
+    """
+    candidates = []
+    seen = set()
+
+    def add(text: str, category: str):
+        text = _clean_memory_value(text, 120)
+        if not text:
+            return
+        key = text.lower()
+        if key in seen:
+            return
+        seen.add(key)
+        candidates.append({"text": text, "category": category})
+
+    for msg in messages:
+        if _message_role(msg) != "user":
+            continue
+        text = _message_text(msg)
+        if not text:
+            continue
+
+        m = re.search(r"\bmy name is\s+([A-Za-z][A-Za-z0-9 .'\-]{1,50})\b", text, re.I)
+        if m:
+            name = _clean_memory_value(m.group(1), 50)
+            if name:
+                add(f"User's name is {name}.", "identity")
+
+        m = re.search(r"\bcall me\s+([A-Za-z][A-Za-z0-9 .'\-]{1,50})\b", text, re.I)
+        if m:
+            name = _clean_memory_value(m.group(1), 50)
+            if name:
+                add(f"User wants to be called {name}.", "identity")
+
+        m = re.search(r"\bi (?:live in|am from|'m from)\s+([^.!?\n]{2,80})", text, re.I)
+        if m:
+            place = _clean_memory_value(m.group(1), 80)
+            if place:
+                add(f"User lives in {place}.", "identity")
+
+        m = re.search(r"\bi (?:prefer|like|love|hate|do not like|don't like)\s+([^.!?\n]{4,100})", text, re.I)
+        if m:
+            preference = _clean_memory_value(m.group(1), 100)
+            if preference:
+                add(f"User prefers {preference}.", "preference")
+
+        m = re.search(
+            r"\bi (?:(?:want|would like|plan|hope) to|wanna) "
+            r"(?:go|travel|move|visit) to\s+([^.!?\n]{2,80})",
+            text,
+            re.I,
+        )
+        if m:
+            destination = _clean_memory_value(m.group(1), 80)
+            if destination:
+                add(f"User wants to visit {destination}.", "goal")
+
+    return candidates[:2]
+
+
+def _is_text_duplicate(new_text: str, existing: list, threshold: float = 0.6) -> bool:
+    """Check if new_text is too similar to any existing memory (Jaccard similarity)."""
+    new_tokens = set(new_text.lower().split())
+    if not new_tokens:
+        return False
+    for entry in existing:
+        old_tokens = set(entry.get("text", "").lower().split())
+        if not old_tokens:
+            continue
+        intersection = new_tokens & old_tokens
+        union = new_tokens | old_tokens
+        if len(intersection) / len(union) >= threshold:
+            return True
+    return False
+
+
+async def extract_and_store(
+    session,
+    memory_manager,
+    memory_vector,
+    endpoint_url: str,
+    model: str,
+    headers: Optional[dict] = None,
+):
+    """Extract facts from recent conversation and store them.
+
+    Designed to run as a background task (asyncio.create_task).
+    Errors are logged, never raised.
+    """
+    try:
+        from src.llm_core import llm_call_async
+
+        # Get last N messages from session
+        messages = session.get_context_messages()
+        recent = messages[-CONTEXT_WINDOW:] if len(messages) > CONTEXT_WINDOW else messages
+
+        if len(recent) < 2:
+            return  # Need at least a user message and assistant response
+
+        fallback_facts = _fallback_memory_candidates(recent)
+
+        extraction_messages = [
+            {"role": "system", "content": EXTRACT_SYSTEM_PROMPT},
+        ] + recent
+
+        facts = []
+        try:
+            raw = await llm_call_async(
+                endpoint_url,
+                model,
+                extraction_messages,
+                temperature=0.1,
+                max_tokens=500,
+                headers=headers,
+            )
+
+            # Parse JSON from response (handle markdown fences if model wraps them)
+            text = raw.strip()
+            if text.startswith("```"):
+                text = text.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
+
+            try:
+                facts = json.loads(text)
+            except json.JSONDecodeError:
+                logger.debug("Memory extraction returned non-JSON")
+        except Exception as e:
+            logger.warning(f"LLM memory extraction failed; using fallback candidates if available: {e}")
+
+        if not isinstance(facts, list):
+            facts = []
+
+        if fallback_facts:
+            facts = list(facts) + fallback_facts
+
+        if not facts:
+            logger.info("Auto memory extraction ran: 0 candidates")
+            return
+
+        # Get owner from session
+        _owner = getattr(session, 'owner', None)
+
+        existing = memory_manager.load_all()
+        added = 0
+
+        for fact in facts:
+            if isinstance(fact, str):
+                fact_text = fact
+                category = "fact"
+            elif isinstance(fact, dict):
+                fact_text = fact.get("text", "").strip()
+                category = fact.get("category", "fact")
+            else:
+                continue
+
+            if not fact_text or len(fact_text) < 5:
+                continue
+
+            # Dedup: check vector similarity first (fast), then exact text match
+            if memory_vector and memory_vector.healthy:
+                existing_id = memory_vector.find_similar(fact_text, threshold=0.72)
+                if existing_id:
+                    logger.debug(f"Memory dedup (vector): '{fact_text[:50]}' matches {existing_id}")
+                    continue
+
+            # Text dedup fallback: exact match + fuzzy similarity
+            user_existing = [e for e in existing if e.get("owner") == _owner or e.get("owner") is None] if _owner else existing
+            if memory_manager.find_duplicates(fact_text, user_existing):
+                continue
+            # Fuzzy text similarity check (catches rephrased duplicates when vector index is unavailable)
+            if _is_text_duplicate(fact_text, user_existing):
+                logger.debug(f"Memory dedup (fuzzy): '{fact_text[:50]}' too similar to existing")
+                continue
+
+            entry = memory_manager.add_entry(fact_text, source="auto", category=category, owner=_owner)
+            # Auto-pin identity facts (name, job, location) — core context
+            if category == "identity":
+                entry["pinned"] = True
+            if hasattr(session, "session_id"):
+                entry["session_id"] = session.session_id
+            elif hasattr(session, "name"):
+                entry["session_id"] = session.name
+
+            existing.append(entry)
+
+            # Add to vector index
+            if memory_vector and memory_vector.healthy:
+                memory_vector.add(entry["id"], fact_text)
+
+            added += 1
+
+        if added > 0:
+            memory_manager.save(existing)
+            try:
+                from src.event_bus import fire_event
+                for _ in range(added):
+                    fire_event("memory_added", _owner)
+            except Exception:
+                logger.debug("memory_added event dispatch failed", exc_info=True)
+            logger.info(f"Auto-extracted {added} memories from session")
+
+            global _extractions_since_audit
+            _extractions_since_audit += added
+            if _extractions_since_audit >= AUDIT_INTERVAL:
+                _extractions_since_audit = 0
+                logger.info("Audit threshold reached, running memory audit")
+                await audit_memories(
+                    memory_manager, memory_vector, endpoint_url, model, headers, owner=_owner
+                )
+        else:
+            logger.info("Auto memory extraction ran: 0 added")
+
+    except Exception as e:
+        logger.error(f"Memory extraction failed: {e}")
+
+
+async def audit_memories(
+    memory_manager,
+    memory_vector,
+    endpoint_url: str,
+    model: str,
+    headers: Optional[dict] = None,
+    owner: Optional[str] = None,
+):
+    """Send all memories to the LLM for deduplication and consolidation.
+
+    - Merges near-duplicate entries
+    - Rewrites vague entries to be concise
+    - Removes junk / non-personal entries
+    - Rebuilds the vector index afterwards
+
+    Safe to call manually or from the automatic trigger in extract_and_store.
+    Errors are logged, never raised.
+    """
+    try:
+        from src.llm_core import llm_call_async
+
+        existing = memory_manager.load(owner=owner)
+        if not existing:
+            logger.info("Memory audit: nothing to audit")
+            return {"before": 0, "after": 0}
+
+        before_count = len(existing)
+
+        # Skip the LLM call entirely when this exact set of memories was
+        # already audited — the previous tidy left them in a clean state
+        # and nothing has changed since. Returns instantly so the UI shows
+        # "Already clean" without spending 30-120s on a wasted LLM round.
+        # The fingerprint includes id+text+category; any add/edit/delete
+        # invalidates it and the audit runs normally.
+        current_fp = _fingerprint_entries(existing)
+        last_state = _load_tidy_state(memory_manager).get(owner or "") or {}
+        if last_state.get("fingerprint") == current_fp:
+            logger.info("Memory audit: state unchanged since last tidy — skipping LLM")
+            return {
+                "before": before_count,
+                "after": before_count,
+                "already_tidy": True,
+            }
+
+        # Build payload: list of {id, text, category} for the LLM
+        memory_payload = [
+            {"id": m["id"], "text": m["text"], "category": m.get("category", "fact")}
+            for m in existing
+        ]
+
+        audit_messages = [
+            {"role": "system", "content": AUDIT_SYSTEM_PROMPT},
+            {"role": "user", "content": json.dumps(memory_payload, ensure_ascii=False)},
+        ]
+
+        raw = await llm_call_async(
+            endpoint_url,
+            model,
+            audit_messages,
+            temperature=0.1,
+            # 16384 (was 2000): the deduped list of all memories can be large,
+            # and a reasoning model spends tokens thinking first — 2000 truncated
+            # the JSON so it never parsed ("bad_json").
+            max_tokens=16384,
+            headers=headers,
+            # Bound the call so the Tidy whirlpool can't spin indefinitely on a
+            # slow/large generation.
+            timeout=120,
+        )
+
+        # Parse the JSON list, tolerating reasoning-model noise: <think> blocks,
+        # markdown fences, leading prose, and trailing commas.
+        import re as _re
+        text = (raw or "").strip()
+        text = _re.sub(r'<think(?:ing)?>[\s\S]*?</think(?:ing)?>', '', text, flags=_re.I).strip()
+
+        def _loads_list(s):
+            if not s:
+                return None
+            for cand in (s, _re.sub(r',(\s*[}\]])', r'\1', s)):
+                try:
+                    v = json.loads(cand)
+                    if isinstance(v, list):
+                        return v
+                except Exception:
+                    continue
+            return None
+
+        cleaned = _loads_list(text)
+        if cleaned is None:
+            _m = _re.search(r'```(?:json)?\s*\n?([\s\S]*?)```', text)
+            if _m:
+                cleaned = _loads_list(_m.group(1).strip())
+        if cleaned is None:
+            _a, _b = text.find('['), text.rfind(']')
+            if _a >= 0 and _b > _a:
+                cleaned = _loads_list(text[_a:_b + 1])
+        if cleaned is None:
+            logger.error(f"Memory audit returned non-JSON: {text[:300]}")
+            return {"before": before_count, "after": before_count, "error": "bad_json"}
+
+        # Build lookup of original entries by ID so we can preserve metadata
+        originals = {m["id"]: m for m in existing}
+
+        final_entries = []
+        for item in cleaned:
+            if not isinstance(item, dict):
+                continue
+            mid = item.get("id", "")
+            new_text = item.get("text", "").strip()
+            if not new_text:
+                continue
+
+            if mid in originals:
+                # Preserve original metadata, update text + category
+                entry = originals[mid].copy()
+                entry["text"] = new_text
+                if item.get("category"):
+                    entry["category"] = item["category"]
+            else:
+                # ID not found — skip to avoid inventing entries
+                logger.debug(f"Audit returned unknown id {mid}, skipping")
+                continue
+
+            final_entries.append(entry)
+
+        after_count = len(final_entries)
+
+        # Safety net against catastrophic over-deletion. A conservative tidy
+        # should never wipe out half the store in one pass — if the model
+        # returned far fewer entries than it was given (over-consolidation, a
+        # dropped/truncated list, or it ignored ids), treat it as a misfire and
+        # DON'T save. Better to no-op than to silently lose memories.
+        if before_count >= 8 and after_count < before_count * 0.5:
+            logger.warning(
+                f"Memory audit would cut {before_count} -> {after_count} "
+                f"(>50% removed) — refusing as unsafe, keeping originals"
+            )
+            return {"before": before_count, "after": before_count, "error": "unsafe_removal"}
+
+        # Merge audited entries back with other users' entries
+        if owner:
+            all_entries = memory_manager.load_all()
+            audited_ids = {e["id"] for e in final_entries}
+            other_entries = [e for e in all_entries if e.get("owner") != owner and (e.get("owner") is not None)]
+            # Also keep legacy entries that weren't part of this audit
+            for e in all_entries:
+                if e.get("owner") is None and e["id"] not in audited_ids and e["id"] not in {o["id"] for o in other_entries}:
+                    other_entries.append(e)
+            memory_manager.save(final_entries + other_entries)
+        else:
+            memory_manager.save(final_entries)
+        logger.info(
+            f"Memory audit complete: {before_count} -> {after_count} entries "
+            f"({before_count - after_count} removed/merged)"
+        )
+
+        # Rebuild vector index
+        if memory_vector and memory_vector.healthy:
+            memory_vector.rebuild(final_entries)
+
+        # Persist the post-tidy fingerprint so the next call short-circuits
+        # if nothing has changed in the meantime.
+        _save_tidy_state(memory_manager, owner, _fingerprint_entries(final_entries))
+
+        return {"before": before_count, "after": after_count}
+
+    except Exception as e:
+        logger.error(f"Memory audit failed: {e}")
+        return {"error": str(e)}
--- a/services/memory/memory_vector.py
+++ b/services/memory/memory_vector.py
@@ -0,0 +1,175 @@
+"""
+memory_vector.py
+
+ChromaDB-backed vector store for memory entries.
+Shares the EmbeddingClient with RAG to save memory.
+Stores pre-computed embeddings (ChromaDB does not manage embedding).
+"""
+
+import logging
+from typing import List, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class MemoryVectorStore:
+    """Vector index over memory entries for semantic retrieval."""
+
+    COLLECTION_NAME = "odysseus_memories"
+
+    def __init__(self, data_dir: str, embedding_model=None):
+        self._model = embedding_model
+        self._collection = None
+        self._healthy = False
+
+        self._initialize()
+
+    def _initialize(self):
+        try:
+            from src.chroma_client import get_chroma_client
+
+            if self._model is None:
+                from src.embeddings import get_embedding_client
+                self._model = get_embedding_client()
+                if self._model is None:
+                    raise RuntimeError("No embedding backend available")
+                logger.info(f"MemoryVectorStore using embeddings: {self._model.url}")
+
+            client = get_chroma_client()
+            self._collection = client.get_or_create_collection(
+                name=self.COLLECTION_NAME,
+                metadata={"hnsw:space": "cosine"},
+            )
+
+            self._healthy = True
+            count = self._collection.count()
+            logger.info(f"MemoryVectorStore ready (entries={count})")
+
+        except Exception as e:
+            logger.error(f"MemoryVectorStore init failed: {e}")
+
+    @property
+    def healthy(self) -> bool:
+        return self._healthy
+
+    def _embed(self, texts: List[str]) -> List[List[float]]:
+        vecs = self._model.encode(texts, normalize_embeddings=True)
+        return vecs.tolist()
+
+    def count(self) -> int:
+        """Return the number of stored vectors."""
+        if not self._healthy:
+            return 0
+        return self._collection.count()
+
+    def add(self, memory_id: str, text: str):
+        """Add a single memory entry to the vector index."""
+        if not self._healthy:
+            return
+        # Skip if already exists
+        existing = self._collection.get(ids=[memory_id])
+        if existing["ids"]:
+            return
+        embeddings = self._embed([text])
+        self._collection.add(
+            ids=[memory_id],
+            embeddings=embeddings,
+            documents=[text],
+            metadatas=[{"source": "memory"}],
+        )
+
+    def remove(self, memory_id: str):
+        """Remove a memory entry. O(1) — no rebuild needed."""
+        if not self._healthy:
+            return
+        try:
+            self._collection.delete(ids=[memory_id])
+        except Exception as e:
+            logger.warning(f"memory remove {memory_id}: {e}")
+
+    def search(self, query: str, k: int = 8) -> List[Dict]:
+        """Search for the most relevant memory IDs by semantic similarity.
+        Returns list of {"memory_id": str, "score": float}.
+
+        ChromaDB cosine distance = 1 - cosine_similarity.
+        We convert back: similarity = 1.0 - distance.
+        """
+        if not self._healthy or self._collection.count() == 0:
+            return []
+
+        embeddings = self._embed([query])
+        actual_k = min(k, self._collection.count())
+        results = self._collection.query(
+            query_embeddings=embeddings,
+            n_results=actual_k,
+        )
+
+        out = []
+        for idx, mid in enumerate(results["ids"][0]):
+            distance = results["distances"][0][idx]
+            out.append({
+                "memory_id": mid,
+                "score": round(1.0 - distance, 4),
+            })
+        return out
+
+    def find_similar(self, text: str, threshold: float = 0.92) -> Optional[str]:
+        """Check if a near-duplicate exists. Returns memory_id if found, else None."""
+        if not self._healthy or self._collection.count() == 0:
+            return None
+
+        embeddings = self._embed([text])
+        results = self._collection.query(
+            query_embeddings=embeddings,
+            n_results=1,
+        )
+
+        if results["ids"][0]:
+            distance = results["distances"][0][0]
+            similarity = 1.0 - distance
+            if similarity >= threshold:
+                return results["ids"][0][0]
+        return None
+
+    def rebuild(self, memories: List[Dict]):
+        """Rebuild the entire index from a list of memory entries.
+        Each entry must have 'id' and 'text' keys."""
+        if not self._healthy:
+            return
+
+        from src.chroma_client import get_chroma_client
+
+        # Delete and recreate collection for a clean rebuild
+        client = get_chroma_client()
+        try:
+            client.delete_collection(self.COLLECTION_NAME)
+        except Exception:
+            pass
+        self._collection = client.get_or_create_collection(
+            name=self.COLLECTION_NAME,
+            metadata={"hnsw:space": "cosine"},
+        )
+
+        texts = []
+        ids = []
+        for mem in memories:
+            text = mem.get("text", "").strip()
+            mid = mem.get("id", "")
+            if text and mid:
+                texts.append(text)
+                ids.append(mid)
+
+        if texts:
+            # Batch in chunks of 100 to avoid oversized requests
+            for i in range(0, len(texts), 100):
+                batch_texts = texts[i:i + 100]
+                batch_ids = ids[i:i + 100]
+                embeddings = self._embed(batch_texts)
+                self._collection.add(
+                    ids=batch_ids,
+                    embeddings=embeddings,
+                    documents=batch_texts,
+                    metadatas=[{"source": "memory"}] * len(batch_ids),
+                )
+
+        logger.info(f"MemoryVectorStore rebuilt with {len(ids)} entries")
--- a/services/memory/service.py
+++ b/services/memory/service.py
@@ -0,0 +1,137 @@
+# services/memory/service.py
+"""Memory service — persistent memory storage and retrieval."""
+
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+import os
+
+from .memory import MemoryManager
+from .memory_vector import MemoryVectorStore
+
+
+@dataclass
+class Memory:
+    """A stored memory."""
+    id: str
+    text: str
+    timestamp: int
+    session_id: Optional[str] = None
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class MemorySearchResult:
+    """Result of memory search."""
+    memories: List[Memory]
+    query: str
+    total: int
+
+
+class MemoryService:
+    """
+    Memory storage and retrieval service.
+
+    Usage:
+        service = MemoryService()
+        await service.remember("User prefers dark mode")
+        results = await service.recall("preferences")
+    """
+
+    def __init__(self, data_dir: str = "data"):
+        self.manager = MemoryManager(data_dir)
+        self.vector_store = MemoryVectorStore(data_dir) if os.path.exists(
+            os.path.join(data_dir, "memory_vectors")
+        ) else None
+
+    async def remember(self, text: str, session_id: Optional[str] = None) -> Memory:
+        """
+        Store a new memory.
+
+        Args:
+            text: Memory content
+            session_id: Optional session association
+
+        Returns:
+            Created Memory object
+        """
+        import uuid
+        import time
+
+        memory_id = str(uuid.uuid4())[:8]
+        timestamp = int(time.time())
+
+        entry = {
+            "id": memory_id,
+            "text": text,
+            "timestamp": timestamp,
+            "session_id": session_id,
+        }
+
+        self.manager.add_memory(entry)
+
+        # Also add to vector store if available
+        if self.vector_store:
+            self.vector_store.add(text, {"id": memory_id, "session_id": session_id})
+
+        return Memory(
+            id=memory_id,
+            text=text,
+            timestamp=timestamp,
+            session_id=session_id,
+        )
+
+    async def recall(self, query: str, top_k: int = 5) -> MemorySearchResult:
+        """
+        Search memories.
+
+        Args:
+            query: Search query
+            top_k: Max results
+
+        Returns:
+            MemorySearchResult with matching memories
+        """
+        # Try vector search first
+        if self.vector_store:
+            results = self.vector_store.search(query, k=top_k)
+            memories = [
+                Memory(
+                    id=r.get("id", ""),
+                    text=r.get("text", ""),
+                    timestamp=r.get("timestamp", 0),
+                    session_id=r.get("session_id"),
+                    metadata=r.get("metadata", {}),
+                )
+                for r in results
+            ]
+            return MemorySearchResult(memories=memories, query=query, total=len(memories))
+
+        # Fallback to keyword search
+        results = self.manager.search_memories(query, limit=top_k)
+        memories = [
+            Memory(
+                id=m.get("id", ""),
+                text=m.get("text", ""),
+                timestamp=m.get("timestamp", 0),
+                session_id=m.get("session_id"),
+            )
+            for m in results
+        ]
+        return MemorySearchResult(memories=memories, query=query, total=len(memories))
+
+    def get_all(self, limit: int = 100) -> List[Memory]:
+        """Get all memories."""
+        memories = self.manager.get_memories(limit=limit)
+        return [
+            Memory(
+                id=m.get("id", ""),
+                text=m.get("text", ""),
+                timestamp=m.get("timestamp", 0),
+                session_id=m.get("session_id"),
+            )
+            for m in memories
+        ]
+
+    def delete(self, memory_id: str) -> bool:
+        """Delete a memory by ID."""
+        return self.manager.delete_memory(memory_id)
--- a/services/memory/skill_extractor.py
+++ b/services/memory/skill_extractor.py
@@ -0,0 +1,209 @@
+"""
+skill_extractor.py
+
+Background auto-extraction of skills from complex agent runs.
+When the agent takes >= 2 rounds or >= 2 tool calls to complete a task,
+we ask the LLM to distill the approach into a reusable skill.
+"""
+
+import json
+import logging
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+SKILL_EXTRACT_PROMPT = (
+    "You are analyzing an AI agent's work session. The agent took {rounds} rounds "
+    "and {tool_count} tool calls to complete the task.\n\n"
+    "Extract a reusable 'skill' ONLY IF the session contains a concrete, "
+    "repeatable procedure the agent could follow to solve a similar problem "
+    "ON THE COMPUTER next time (e.g. a sequence of shell commands, code, file "
+    "edits, API calls, or tool usage).\n\n"
+    "Return null (the bare word, no JSON) when the session is NOT a reusable "
+    "computer procedure, including:\n"
+    "- The real work happened OUTSIDE the computer (the user did something "
+    "physically, in person, on another device, or by hand) and the agent only "
+    "discussed or advised it.\n"
+    "- A one-off, personal, or context-specific task that won't recur "
+    "(personal errands, a specific person/place/date, casual conversation).\n"
+    "- A pure question/answer or explanation with no transferable method.\n"
+    "- The agent failed, gave up, or the approach is not worth repeating.\n\n"
+    "When (and only when) a genuine reusable procedure exists, return a JSON "
+    "object with:\n"
+    '- "title": short name (under 10 words)\n'
+    '- "problem": what was the challenge (1-2 sentences)\n'
+    '- "solution": what worked (1-2 sentences)\n'
+    '- "steps": array of step-by-step instructions (3-7 short steps)\n'
+    '- "tags": array of relevant keywords (3-5 tags)\n'
+    '- "confidence": 0.0-1.0 how reliable AND reusable this procedure is\n\n'
+    "Be conservative: if in doubt, return null.\n"
+    "Return ONLY valid JSON (or the bare word null), no markdown fences."
+)
+
+# Skills the model is unsure about (or that read as one-offs) add clutter —
+# drop anything below this confidence.
+MIN_CONFIDENCE = 0.6
+
+# How many recent messages to include
+CONTEXT_WINDOW = 12
+
+
+async def maybe_extract_skill(
+    session,
+    skills_manager,
+    endpoint_url: str,
+    model: str,
+    headers: dict,
+    round_count: int,
+    tool_count: int,
+    owner: Optional[str] = None,
+):
+    """Extract a skill if the agent run was complex enough."""
+    # Quiet by default; flip to DEBUG when chasing extractor issues.
+    logger.debug(
+        "[skill-extract] start: rounds=%d tools=%d model=%s owner=%s",
+        round_count, tool_count, model, owner,
+    )
+    if round_count < 2 and tool_count < 2:
+        logger.debug("[skill-extract] BELOW threshold (need rounds>=2 or tools>=2)")
+        return None
+
+    try:
+        from src.llm_core import llm_call_async
+
+        # Get recent messages
+        history = session.get_context_messages()
+        recent = history[-CONTEXT_WINDOW:] if len(history) > CONTEXT_WINDOW else history
+        if not recent:
+            logger.debug("[skill-extract] no recent messages, skipping")
+            return None
+
+        # Build conversation summary for extraction
+        conv_lines = []
+        for msg in recent:
+            role = msg.get("role", "?")
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                content = " ".join(
+                    b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
+                )
+            # Truncate long messages
+            if len(content) > 500:
+                content = content[:500] + "..."
+            conv_lines.append(f"[{role}] {content}")
+
+        conversation = "\n".join(conv_lines)
+
+        prompt = SKILL_EXTRACT_PROMPT.format(rounds=round_count, tool_count=tool_count)
+
+        import time as _time
+        _t0 = _time.monotonic()
+        logger.debug(
+            "[skill-extract] calling LLM (endpoint=%s, ctx=%d msgs, timeout=30s)",
+            endpoint_url, len(recent),
+        )
+        response = await llm_call_async(
+            endpoint_url,
+            model,
+            [
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": f"Conversation:\n{conversation}"},
+            ],
+            headers=headers,
+            timeout=30,
+        )
+        logger.debug(
+            "[skill-extract] LLM returned in %.1fs (len=%d, head=%r)",
+            _time.monotonic() - _t0, len(response or ""), (response or "")[:80],
+        )
+
+        if not response or response.strip().lower() == "null":
+            logger.debug(
+                "[skill-extract] LLM declined (returned null/empty) — "
+                "session deemed not a reusable procedure"
+            )
+            return None
+
+        # Some models (MiniMax, Qwen-Thinker, DeepSeek-R1) emit their
+        # chain-of-thought BEFORE the JSON output even when asked for
+        # raw JSON. `strip_think(prose=True, prompt_echo=True)` removes
+        # <think>…</think> tags AND prose-style "Let me analyze this…"
+        # preambles. Without it, json.loads bombed on character 0 every
+        # time and the silent-bail looked like "extractor doesn't work".
+        try:
+            from src.text_helpers import strip_think as _strip_think
+            response = _strip_think(response, prose=True, prompt_echo=True)
+        except Exception:
+            pass
+
+        # Parse JSON
+        text = response.strip()
+        if text.startswith("```"):
+            text = text.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
+        # After strip_think, the JSON may still be embedded inside surrounding
+        # commentary — slice from the first '{' to the matching last '}'.
+        if text and text[0] != "{":
+            _start = text.find("{")
+            _end = text.rfind("}")
+            if 0 <= _start < _end:
+                text = text[_start : _end + 1]
+
+        data = json.loads(text)
+        if not data or not isinstance(data, dict):
+            logger.debug("[skill-extract] parsed JSON not a dict, dropping")
+            return None
+
+        title = data.get("title", "").strip()
+        if not title:
+            logger.debug("[skill-extract] LLM returned object with no title, dropping")
+            return None
+
+        # Honour the model's own reliability/reusability estimate — low-
+        # confidence extractions are usually one-offs or shaky procedures.
+        try:
+            _conf = float(data.get("confidence", 0.7))
+        except (TypeError, ValueError):
+            _conf = 0.7
+        if _conf < MIN_CONFIDENCE:
+            logger.debug(
+                "[skill-extract] '%s' below confidence floor (%.2f < %.2f) — dropped",
+                title, _conf, MIN_CONFIDENCE,
+            )
+            return None
+
+        # Check for duplicate skills
+        existing = skills_manager.load(owner=owner)
+        for sk in existing:
+            if sk.get("title", "").lower() == title.lower():
+                logger.debug("[skill-extract] '%s' already exists — dropped as duplicate", title)
+                return None
+
+        entry = skills_manager.add_skill(
+            title=title,
+            problem=data.get("problem", ""),
+            solution=data.get("solution", ""),
+            steps=data.get("steps", []),
+            tags=data.get("tags", []),
+            source="learned",
+            confidence=data.get("confidence", 0.7),
+            session_id=getattr(session, "session_id", None),
+            owner=owner,
+        )
+        try:
+            from src.event_bus import fire_event
+            fire_event("skill_added", owner)
+        except Exception:
+            logger.debug("skill_added event dispatch failed", exc_info=True)
+        logger.info("Auto-extracted skill: %s (id=%s)", title, entry["id"])
+        return entry
+
+    except json.JSONDecodeError as e:
+        logger.debug("[skill-extract] non-JSON LLM response, dropping: %s", e)
+        return None
+    except Exception as e:
+        # Real exceptions stay INFO+warning so they don't get lost when
+        # users only have default log level. `exc_info=True` ships the
+        # full traceback so timeouts vs auth vs import errors are
+        # distinguishable from outside.
+        logger.warning("[skill-extract] FAILED: %s", e, exc_info=True)
+        return None
--- a/services/memory/skill_format.py
+++ b/services/memory/skill_format.py
@@ -0,0 +1,444 @@
+"""SKILL.md parser & writer.
+
+Reads/writes a single skill from a `SKILL.md` file with YAML frontmatter
+and a structured markdown body. Inspired by Hermes' skills format
+(https://hermes-agent.nousresearch.com/docs/user-guide/features/skills).
+
+Frontmatter shape (YAML):
+
+    ---
+    name: open-pr-from-branch
+    description: One-line summary surfaced in the skills index.
+    version: 1.0.0
+    category: dev
+    tags: [git, github]
+    platforms: [linux, macos]            # optional
+    requires_toolsets: []                # optional
+    fallback_for_toolsets: []            # optional
+    status: published                    # draft | published
+    confidence: 0.8                      # 0..1
+    source: learned                      # learned | taught | imported
+    teacher_model: claude-opus-4-7       # optional
+    created: 2026-05-09T21:43:00Z
+    ---
+
+Body sections (any subset; rendered as headings):
+
+    ## When to Use
+    Trigger conditions in plain English.
+
+    ## Procedure
+    1. First step
+    2. Second step
+
+    ## Pitfalls
+    - Common failure mode + how to recover
+
+    ## Verification
+    - How to confirm success
+
+    Anything else (raw paragraphs after the last known section) is preserved
+    in `body_extra` and round-trips on save.
+
+Usage counters (`uses`, `last_used`) live in a sidecar `_usage.json` keyed
+by skill name, so the SKILL.md file doesn't churn on every retrieval.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Slugify
+# ---------------------------------------------------------------------------
+
+_SLUG_RE = re.compile(r"[^a-z0-9]+")
+
+
+def slugify(text: str, fallback: str = "skill") -> str:
+    """Convert a free-form title to a kebab-case slug suitable for a directory
+    name. Strips non-alphanumerics, collapses runs, trims leading/trailing
+    dashes. Caps at 60 chars."""
+    s = str(text or "").strip().lower()
+    s = _SLUG_RE.sub("-", s)
+    s = s.strip("-")
+    return (s or fallback)[:60]
+
+
+# ---------------------------------------------------------------------------
+# Frontmatter (minimal YAML — we don't pull in PyYAML for one feature)
+# ---------------------------------------------------------------------------
+
+# We accept a tiny subset of YAML: scalar `key: value`, inline lists `[a, b]`,
+# and block lists with `-`. That covers everything in our schema and avoids
+# a new dependency.
+
+_FM_KEY_RE = re.compile(r"^([a-z_][a-z0-9_]*):\s*(.*)$", re.IGNORECASE)
+_FM_BLOCK_LIST_RE = re.compile(r"^\s*-\s*(.*)$")
+
+
+def _parse_scalar(raw: str) -> Any:
+    raw = raw.strip()
+    if raw == "":
+        return ""
+    if raw.startswith("[") and raw.endswith("]"):
+        inner = raw[1:-1].strip()
+        if not inner:
+            return []
+        return [_parse_scalar(p) for p in _split_top_level(inner, ",")]
+    if raw.lower() in ("true", "yes"):
+        return True
+    if raw.lower() in ("false", "no"):
+        return False
+    if raw.lower() in ("null", "none", "~"):
+        return None
+    if (raw[0] == raw[-1]) and raw[0] in ("'", '"'):
+        return raw[1:-1]
+    # Try number
+    try:
+        if "." in raw:
+            return float(raw)
+        return int(raw)
+    except ValueError:
+        pass
+    return raw
+
+
+def _split_top_level(s: str, sep: str) -> List[str]:
+    """Split `s` on `sep` ignoring separators inside [] or quotes."""
+    out, buf, depth, quote = [], [], 0, None
+    for ch in s:
+        if quote:
+            buf.append(ch)
+            if ch == quote:
+                quote = None
+            continue
+        if ch in ("'", '"'):
+            quote = ch
+            buf.append(ch)
+            continue
+        if ch == "[":
+            depth += 1
+        elif ch == "]":
+            depth = max(0, depth - 1)
+        if ch == sep and depth == 0:
+            out.append("".join(buf).strip())
+            buf = []
+            continue
+        buf.append(ch)
+    if buf:
+        out.append("".join(buf).strip())
+    return out
+
+
+def parse_frontmatter(text: str) -> tuple[Dict[str, Any], str]:
+    """Pull the YAML frontmatter out of a SKILL.md and return (fm, body)."""
+    if not text.startswith("---"):
+        return {}, text
+    end = text.find("\n---", 3)
+    if end < 0:
+        return {}, text
+    fm_text = text[3:end].lstrip("\n")
+    body = text[end + 4:].lstrip("\n")
+    fm: Dict[str, Any] = {}
+    pending_key: Optional[str] = None
+    for line in fm_text.splitlines():
+        if not line.strip() or line.lstrip().startswith("#"):
+            continue
+        m = _FM_KEY_RE.match(line)
+        if m:
+            key, val = m.group(1), m.group(2)
+            if val.strip() == "":
+                pending_key = key
+                fm[key] = []
+            else:
+                fm[key] = _parse_scalar(val)
+                pending_key = None
+            continue
+        m2 = _FM_BLOCK_LIST_RE.match(line)
+        if m2 and pending_key:
+            existing = fm.get(pending_key)
+            if not isinstance(existing, list):
+                fm[pending_key] = []
+            fm[pending_key].append(_parse_scalar(m2.group(1)))
+    return fm, body
+
+
+def _emit_scalar(v: Any) -> str:
+    if v is None:
+        return "null"
+    if isinstance(v, bool):
+        return "true" if v else "false"
+    if isinstance(v, (int, float)):
+        return str(v)
+    if isinstance(v, list):
+        return "[" + ", ".join(_emit_scalar(x) for x in v) + "]"
+    s = str(v)
+    if any(c in s for c in (":", "#", "\n", "[", "]", "{", "}", ",", "&", "*", "!", "|", ">", "'", '"', "%", "@")):
+        return json.dumps(s)
+    return s
+
+
+def _as_list(v: Any) -> List[str]:
+    if v is None:
+        return []
+    if isinstance(v, list):
+        return [str(x) for x in v if x not in (None, "")]
+    return [str(v)]
+
+
+def _as_float(v: Any, default: float = 0.8) -> float:
+    try:
+        return float(v)
+    except (TypeError, ValueError):
+        return default
+
+
+def emit_frontmatter(fm: Dict[str, Any]) -> str:
+    lines = []
+    for k, v in fm.items():
+        if v is None or v == [] or v == "":
+            continue
+        lines.append(f"{k}: {_emit_scalar(v)}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Skill body sections
+# ---------------------------------------------------------------------------
+
+_KNOWN_SECTIONS = ("when_to_use", "procedure", "pitfalls", "verification")
+_HEADING_TO_KEY = {
+    "when to use": "when_to_use",
+    "procedure": "procedure",
+    "steps": "procedure",
+    "pitfalls": "pitfalls",
+    "verification": "verification",
+}
+_KEY_TO_HEADING = {
+    "when_to_use": "When to Use",
+    "procedure": "Procedure",
+    "pitfalls": "Pitfalls",
+    "verification": "Verification",
+}
+
+
+def parse_body(body: str) -> Dict[str, Any]:
+    """Split a SKILL.md body into known sections.
+
+    Returns:
+        {
+            "when_to_use": str,
+            "procedure":   list[str],   # numbered/bulleted lines
+            "pitfalls":    list[str],
+            "verification": list[str],
+            "body_extra":  str,         # anything not under a known heading
+        }
+    """
+    out = {k: ([] if k != "when_to_use" else "") for k in _KNOWN_SECTIONS}
+    out["body_extra"] = ""
+    if not body or not body.strip():
+        return out
+
+    sections: List[tuple[Optional[str], List[str]]] = [(None, [])]
+    for line in body.splitlines():
+        m = re.match(r"^##\s+(.*?)\s*$", line)
+        if m:
+            heading = m.group(1).strip().lower()
+            key = _HEADING_TO_KEY.get(heading)
+            sections.append((key, []))
+            continue
+        sections[-1][1].append(line)
+
+    for key, lines in sections:
+        text = "\n".join(lines).strip("\n")
+        if key is None:
+            extras = text.strip()
+            if extras:
+                out["body_extra"] = (out["body_extra"] + "\n\n" + extras).strip()
+            continue
+        if key == "when_to_use":
+            out["when_to_use"] = text.strip()
+        else:
+            out[key] = _parse_list_lines(text)
+    return out
+
+
+def _parse_list_lines(text: str) -> List[str]:
+    """Pull bullet/numbered lines out of a section body. Plain paragraphs are
+    treated as a single entry."""
+    items: List[str] = []
+    for line in (text or "").splitlines():
+        s = line.strip()
+        if not s:
+            continue
+        m = re.match(r"^(?:[-*]|\d+[.)])\s+(.*)$", s)
+        if m:
+            items.append(m.group(1).strip())
+        elif items:
+            # continuation of previous bullet
+            items[-1] = items[-1] + " " + s
+        else:
+            items.append(s)
+    return items
+
+
+def emit_body(sections: Dict[str, Any]) -> str:
+    parts: List[str] = []
+    when = (sections.get("when_to_use") or "").strip()
+    if when:
+        parts.append(f"## {_KEY_TO_HEADING['when_to_use']}\n\n{when}")
+    for key in ("procedure", "pitfalls", "verification"):
+        items = sections.get(key) or []
+        if not items:
+            continue
+        heading = _KEY_TO_HEADING[key]
+        if key == "procedure":
+            body = "\n".join(f"{i + 1}. {x}" for i, x in enumerate(items))
+        else:
+            body = "\n".join(f"- {x}" for x in items)
+        parts.append(f"## {heading}\n\n{body}")
+    extra = (sections.get("body_extra") or "").strip()
+    if extra:
+        parts.append(extra)
+    return "\n\n".join(parts) + ("\n" if parts else "")
+
+
+# ---------------------------------------------------------------------------
+# Skill record
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Skill:
+    name: str                                          # slug, dir name
+    description: str = ""
+    version: str = "1.0.0"
+    category: str = "general"
+    tags: List[str] = field(default_factory=list)
+    platforms: List[str] = field(default_factory=list)
+    requires_toolsets: List[str] = field(default_factory=list)
+    fallback_for_toolsets: List[str] = field(default_factory=list)
+    status: str = "draft"                              # draft | published
+    confidence: float = 0.8
+    source: str = "learned"
+    teacher_model: Optional[str] = None
+    owner: Optional[str] = None
+    created: str = ""                                  # ISO8601
+    when_to_use: str = ""
+    procedure: List[str] = field(default_factory=list)
+    pitfalls: List[str] = field(default_factory=list)
+    verification: List[str] = field(default_factory=list)
+    body_extra: str = ""
+    # Sidecar (not persisted in SKILL.md)
+    uses: int = 0
+    last_used: Optional[int] = None
+    # File path on disk (set when read)
+    path: Optional[str] = None
+
+    # ----------------------------------------------------------------------
+    # Serialization
+    # ----------------------------------------------------------------------
+
+    def to_frontmatter(self) -> Dict[str, Any]:
+        fm: Dict[str, Any] = {
+            "name": self.name,
+            "description": self.description,
+            "version": self.version,
+            "category": self.category,
+        }
+        if self.tags:                  fm["tags"] = list(self.tags)
+        if self.platforms:             fm["platforms"] = list(self.platforms)
+        if self.requires_toolsets:     fm["requires_toolsets"] = list(self.requires_toolsets)
+        if self.fallback_for_toolsets: fm["fallback_for_toolsets"] = list(self.fallback_for_toolsets)
+        fm["status"] = self.status
+        fm["confidence"] = round(float(self.confidence), 3)
+        fm["source"] = self.source
+        if self.teacher_model: fm["teacher_model"] = self.teacher_model
+        if self.owner:         fm["owner"] = self.owner
+        fm["created"] = self.created or _now_iso()
+        return fm
+
+    def to_dict(self) -> Dict[str, Any]:
+        d = {
+            "id": self.name,        # slug doubles as id
+            "name": self.name,
+            "description": self.description,
+            "version": self.version,
+            "category": self.category,
+            "tags": list(self.tags),
+            "platforms": list(self.platforms),
+            "requires_toolsets": list(self.requires_toolsets),
+            "fallback_for_toolsets": list(self.fallback_for_toolsets),
+            "status": self.status,
+            "confidence": round(float(self.confidence), 3),
+            "source": self.source,
+            "teacher_model": self.teacher_model,
+            "owner": self.owner,
+            "created": self.created,
+            "when_to_use": self.when_to_use,
+            "procedure": list(self.procedure),
+            "pitfalls": list(self.pitfalls),
+            "verification": list(self.verification),
+            "body_extra": self.body_extra,
+            "uses": int(self.uses or 0),
+            "last_used": self.last_used,
+            "path": self.path,
+        }
+        # Back-compat aliases for the old API/UI
+        d["title"] = self.description or self.name.replace("-", " ").title()
+        d["problem"] = self.when_to_use
+        d["solution"] = (self.procedure[0] if self.procedure else "") if not self.body_extra else self.body_extra
+        d["steps"] = list(self.procedure)
+        return d
+
+    @classmethod
+    def from_markdown(cls, text: str, *, path: Optional[str] = None) -> "Skill":
+        fm, body = parse_frontmatter(text)
+        sections = parse_body(body)
+        raw_name = fm.get("name")
+        name = slugify(raw_name if raw_name not in (None, "") else fm.get("description", ""), fallback="skill")
+        return cls(
+            name=name,
+            description=str(fm.get("description", "") or ""),
+            version=str(fm.get("version", "1.0.0") or "1.0.0"),
+            category=str(fm.get("category", "general") or "general"),
+            tags=_as_list(fm.get("tags")),
+            platforms=_as_list(fm.get("platforms")),
+            requires_toolsets=_as_list(fm.get("requires_toolsets")),
+            fallback_for_toolsets=_as_list(fm.get("fallback_for_toolsets")),
+            status=str(fm.get("status", "draft") or "draft"),
+            confidence=_as_float(fm.get("confidence", 0.8), 0.8),
+            source=str(fm.get("source", "learned") or "learned"),
+            teacher_model=str(fm.get("teacher_model")) if fm.get("teacher_model") else None,
+            owner=str(fm.get("owner")) if fm.get("owner") else None,
+            created=str(fm.get("created") or _now_iso()),
+            when_to_use=sections["when_to_use"],
+            procedure=list(sections["procedure"]),
+            pitfalls=list(sections["pitfalls"]),
+            verification=list(sections["verification"]),
+            body_extra=sections["body_extra"],
+            path=path,
+        )
+
+    def to_markdown(self) -> str:
+        fm = emit_frontmatter(self.to_frontmatter())
+        body = emit_body({
+            "when_to_use": self.when_to_use,
+            "procedure": self.procedure,
+            "pitfalls": self.pitfalls,
+            "verification": self.verification,
+            "body_extra": self.body_extra,
+        })
+        return f"---\n{fm}\n---\n\n{body}"
+
+
+def _now_iso() -> str:
+    return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
--- a/services/memory/skills.py
+++ b/services/memory/skills.py
@@ -0,0 +1,610 @@
+# services/memory/skills.py
+"""Skills storage layer.
+
+Skills live on disk as `data/skills/<category>/<name>/SKILL.md` files with
+YAML frontmatter and a structured markdown body (When to Use / Procedure /
+Pitfalls / Verification). See `skill_format.py` for the format.
+
+Usage counters (`uses`, `last_used`) live in a sidecar
+`data/skills/_usage.json` keyed by skill name so the SKILL.md content
+doesn't churn on every retrieval.
+
+Ownership: skills declare `owner: <username>` in frontmatter. Single-user
+deployments can leave that blank.
+
+This module also retains a JSON fallback for any legacy `data/skills.json`
+entries — they're surfaced as read-only `Skill` objects so old data still
+loads while a user migrates them to disk.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import time
+from typing import Dict, Iterable, List, Optional
+
+from .skill_format import Skill, slugify
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Token / similarity helpers (kept for the relevance fallback)
+# ---------------------------------------------------------------------------
+
+def _tokenize(text: str) -> set:
+    return {w.strip('.,!?";:()[]') for w in (text or "").lower().split() if len(w) > 1}
+
+
+def _jaccard(a: set, b: set) -> float:
+    if not a or not b:
+        return 0.0
+    return len(a & b) / len(a | b)
+
+
+def _to_float(x, default: float = 0.0) -> float:
+    """Coerce a possibly hand-edited frontmatter value to float without
+    raising — a blank or non-numeric `confidence:` in a SKILL.md must not
+    blow up retrieval or eviction."""
+    try:
+        return float(x)
+    except (TypeError, ValueError):
+        return default
+
+
+# ---------------------------------------------------------------------------
+# SkillsManager
+# ---------------------------------------------------------------------------
+
+
+class SkillsManager:
+    """Read/write SKILL.md files under <data_dir>/skills/."""
+
+    def __init__(self, data_dir: str):
+        self.data_dir = data_dir
+        self.skills_root = os.path.join(data_dir, "skills")
+        self.usage_file = os.path.join(self.skills_root, "_usage.json")
+        self.legacy_file = os.path.join(data_dir, "skills.json")  # back-compat
+        os.makedirs(self.skills_root, exist_ok=True)
+
+    # ----------------------------------------------------------------------
+    # Path helpers
+    # ----------------------------------------------------------------------
+
+    def _skill_dir(self, category: str, name: str) -> str:
+        cat = slugify(category or "general", fallback="general")
+        nm = slugify(name, fallback="skill")
+        return os.path.join(self.skills_root, cat, nm)
+
+    def _skill_file(self, category: str, name: str) -> str:
+        return os.path.join(self._skill_dir(category, name), "SKILL.md")
+
+    # ----------------------------------------------------------------------
+    # Usage sidecar
+    # ----------------------------------------------------------------------
+
+    def _load_usage(self) -> Dict[str, Dict]:
+        if not os.path.exists(self.usage_file):
+            return {}
+        try:
+            with open(self.usage_file) as f:
+                d = json.load(f)
+            return d if isinstance(d, dict) else {}
+        except Exception:
+            return {}
+
+    def _save_usage(self, usage: Dict[str, Dict]) -> None:
+        try:
+            from core.atomic_io import atomic_write_json
+            atomic_write_json(self.usage_file, usage, indent=2)
+        except Exception:
+            tmp = self.usage_file + ".tmp"
+            with open(tmp, "w") as f:
+                json.dump(usage, f, indent=2)
+            os.replace(tmp, self.usage_file)
+
+    def set_audit(self, name: str, verdict: str, by_teacher: bool = False,
+                  worker_model: str = "", teacher_model: str = "") -> None:
+        """Record the last test/audit result for a skill in the usage sidecar
+        (so it surfaces in load() without touching SKILL.md). Drives the
+        'verified' check + teacher mark on the card."""
+        import time as _t
+        usage = self._load_usage()
+        e = usage.setdefault(name, {"uses": 0, "last_used": None})
+        e["audit_verdict"] = verdict
+        e["audit_by_teacher"] = bool(by_teacher)
+        if worker_model:
+            e["audit_worker_model"] = worker_model
+        if teacher_model:
+            e["audit_teacher_model"] = teacher_model
+        e["audited_at"] = _t.time()
+        self._save_usage(usage)
+
+    def set_necessity(self, name: str, necessary: bool,
+                      redundant_with=None, reason: str = "") -> None:
+        """Record the advisory 'is this skill necessary?' judgment in the usage
+        sidecar. Surfaced on the card as a flag; never acts on the skill."""
+        usage = self._load_usage()
+        e = usage.setdefault(name, {"uses": 0, "last_used": None})
+        e["necessity"] = {
+            "necessary": bool(necessary),
+            "redundant_with": list(redundant_with or []),
+            "reason": str(reason or ""),
+        }
+        self._save_usage(usage)
+
+    # ----------------------------------------------------------------------
+    # Disk scan
+    # ----------------------------------------------------------------------
+
+    def _iter_skill_files(self) -> Iterable[str]:
+        if not os.path.isdir(self.skills_root):
+            return
+        for root, _dirs, files in os.walk(self.skills_root, followlinks=False):
+            if "SKILL.md" in files:
+                yield os.path.join(root, "SKILL.md")
+
+    def _read_skill(self, path: str) -> Optional[Skill]:
+        try:
+            with open(path) as f:
+                text = f.read()
+            return Skill.from_markdown(text, path=path)
+        except Exception as e:
+            logger.warning(f"Failed to parse {path}: {e}")
+            return None
+
+    def _write_skill(self, sk: Skill) -> str:
+        path = self._skill_file(sk.category or "general", sk.name)
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        from core.atomic_io import atomic_write_text
+        atomic_write_text(path, sk.to_markdown())
+        sk.path = path
+        return path
+
+    def backfill_owner(self, primary_owner: str, valid_owners: Optional[set[str]] = None) -> int:
+        """Assign legacy/unclaimed skill files to the primary owner.
+
+        Skills are disk-backed, so the DB legacy-owner migration cannot fix
+        them. If strict owner filtering is enabled and SKILL.md files have no
+        owner or an owner from a deleted/test account, the UI appears empty even
+        though files still exist. This mirrors the DB legacy-owner sweep.
+        """
+        primary_owner = (primary_owner or "").strip()
+        if not primary_owner:
+            return 0
+        valid_owners = set(valid_owners or [])
+        changed = 0
+        for path in self._iter_skill_files():
+            sk = self._read_skill(path)
+            if not sk:
+                continue
+            owner = (sk.owner or "").strip()
+            if owner == primary_owner:
+                continue
+            if owner and owner in valid_owners:
+                continue
+            sk.owner = primary_owner
+            try:
+                self._write_skill(sk)
+                changed += 1
+            except Exception as e:
+                logger.warning("Failed to backfill owner for skill %s: %s", sk.name, e)
+        return changed
+
+    # ----------------------------------------------------------------------
+    # Public API — keeps the old method names so callers don't break
+    # ----------------------------------------------------------------------
+
+    def load_all(self) -> List[Dict]:
+        """Return every skill as a plain dict, plus any legacy JSON entries."""
+        usage = self._load_usage()
+        out: List[Dict] = []
+        seen_names: set[str] = set()
+        for path in self._iter_skill_files():
+            sk = self._read_skill(path)
+            if not sk:
+                continue
+            d = sk.to_dict()
+            u = usage.get(sk.name) or {}
+            d["uses"] = int(u.get("uses", 0))
+            d["last_used"] = u.get("last_used")
+            d["audit_verdict"] = u.get("audit_verdict")
+            d["audit_by_teacher"] = bool(u.get("audit_by_teacher"))
+            d["audit_worker_model"] = u.get("audit_worker_model")
+            d["audit_teacher_model"] = u.get("audit_teacher_model")
+            d["audited_at"] = u.get("audited_at")
+            d["necessity"] = u.get("necessity")
+            out.append(d)
+            seen_names.add(sk.name)
+        # Legacy JSON entries — surfaced as draft, not editable from new flow
+        if os.path.exists(self.legacy_file):
+            try:
+                with open(self.legacy_file) as f:
+                    legacy = json.load(f)
+                if isinstance(legacy, list):
+                    for row in legacy:
+                        if not isinstance(row, dict):
+                            continue
+                        name = slugify(row.get("title") or row.get("id") or "skill")
+                        if name in seen_names:
+                            continue
+                        out.append({
+                            "id": row.get("id") or name,
+                            "name": name,
+                            "description": row.get("title", ""),
+                            "version": "0.0.1",
+                            "category": "legacy",
+                            "tags": row.get("tags") or [],
+                            "status": row.get("status") or "draft",
+                            "confidence": row.get("confidence", 0.5),
+                            "source": row.get("source", "imported"),
+                            "owner": row.get("owner"),
+                            "when_to_use": row.get("problem", ""),
+                            "procedure": row.get("steps") or [],
+                            "pitfalls": [],
+                            "verification": [],
+                            "body_extra": row.get("solution", ""),
+                            "title": row.get("title", ""),
+                            "problem": row.get("problem", ""),
+                            "solution": row.get("solution", ""),
+                            "steps": row.get("steps") or [],
+                            "uses": row.get("uses", 0),
+                            "last_used": row.get("last_used"),
+                            "_legacy": True,
+                        })
+            except Exception:
+                pass
+        return out
+
+    def load(self, owner: Optional[str] = None) -> List[Dict]:
+        entries = self.load_all()
+        if owner is None:
+            return entries
+        # SECURITY: strict ownership filter. The previous predicate also
+        # included skills with NO owner field (`not s.get("owner")`), which
+        # leaked legacy / un-stamped skills to every authenticated user.
+        # Hide them now; the owner needs to be backfilled on disk if those
+        # skills should be visible to a specific user.
+        return [s for s in entries if s.get("owner") == owner]
+
+    # ----------------------------------------------------------------------
+    # CRUD — disk-backed
+    # ----------------------------------------------------------------------
+
+    def add_skill(
+        self,
+        title: str = "",
+        problem: str = "",
+        solution: str = "",
+        steps: Optional[List[str]] = None,
+        tags: Optional[List[str]] = None,
+        source: str = "learned",
+        teacher_model: Optional[str] = None,
+        confidence: float = 0.8,
+        session_id: Optional[str] = None,
+        owner: Optional[str] = None,
+        # New-schema fields (optional; fall back to old shape if absent)
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        category: str = "general",
+        when_to_use: Optional[str] = None,
+        procedure: Optional[List[str]] = None,
+        pitfalls: Optional[List[str]] = None,
+        verification: Optional[List[str]] = None,
+        platforms: Optional[List[str]] = None,
+        requires_toolsets: Optional[List[str]] = None,
+        fallback_for_toolsets: Optional[List[str]] = None,
+        status: str = "draft",
+        version: str = "1.0.0",
+    ) -> Dict:
+        # Normalize name
+        nm = slugify(name or title or description or "skill")
+
+        # Free dedup-at-creation (always, no API): for LLM-authored skills,
+        # skip if a near-identical skill already exists (Jaccard over
+        # name+description+when_to_use+procedure). User-authored skills are
+        # never auto-skipped — a human asked for it. The every-X AI audit
+        # handles the fuzzier near-duplicates this cheap check won't catch.
+        _all = self.load_all()
+        if source != "user":
+            cand = _tokenize(" ".join([
+                nm, (description or title or ""),
+                (when_to_use if when_to_use is not None else (problem or "")),
+                " ".join(procedure if procedure is not None else (steps or [])),
+            ]))
+            if cand:
+                for s in _all:
+                    ex = _tokenize(" ".join([
+                        s.get("name", ""), s.get("description", ""),
+                        s.get("when_to_use", ""),
+                        " ".join(s.get("procedure", []) or []),
+                    ]))
+                    if _jaccard(cand, ex) >= 0.82:
+                        # Near-identical — don't grow the library; bump the
+                        # existing skill's usage and return it so the caller
+                        # knows it already exists.
+                        try:
+                            self.record_use(s["name"])
+                        except Exception:
+                            pass
+                        return {**s, "_deduped": True, "_duplicate_of": s.get("name")}
+
+        # Avoid clobbering an existing skill with the same name
+        existing = {s["name"] for s in _all}
+        base = nm
+        i = 2
+        while nm in existing:
+            nm = f"{base}-{i}"
+            i += 1
+
+        sk = Skill(
+            name=nm,
+            description=(description or title or "").strip(),
+            version=version,
+            category=category or "general",
+            tags=list(tags or []),
+            platforms=list(platforms or []),
+            requires_toolsets=list(requires_toolsets or []),
+            fallback_for_toolsets=list(fallback_for_toolsets or []),
+            status=status or "draft",
+            confidence=float(confidence),
+            source=source,
+            teacher_model=teacher_model,
+            owner=owner,
+            when_to_use=(when_to_use if when_to_use is not None else (problem or "")),
+            procedure=list(procedure if procedure is not None else (steps or [])),
+            pitfalls=list(pitfalls or []),
+            verification=list(verification or []),
+            body_extra=(solution if solution and not procedure else ""),
+        )
+        self._write_skill(sk)
+
+        return sk.to_dict()
+
+    def update_skill(self, skill_id: str, updates: Dict) -> bool:
+        """`skill_id` is the slug name. Allows updating any field plus
+        renames if `name` changes (file is moved on disk)."""
+        for path in self._iter_skill_files():
+            sk = self._read_skill(path)
+            if not sk or sk.name != skill_id:
+                continue
+            old_dir = os.path.dirname(path)
+
+            # Apply updates in a Skill-shape friendly way
+            scalar_keys = (
+                "description", "version", "category", "status", "confidence",
+                "source", "teacher_model", "owner", "when_to_use",
+                "body_extra",
+            )
+            for k in scalar_keys:
+                if k in updates:
+                    setattr(sk, k, updates[k])
+            list_keys = ("tags", "procedure", "pitfalls", "verification",
+                         "platforms", "requires_toolsets", "fallback_for_toolsets")
+            for k in list_keys:
+                if k in updates:
+                    setattr(sk, k, list(updates[k] or []))
+
+            # Old-schema field aliases
+            if "title" in updates and "description" not in updates:
+                sk.description = updates["title"]
+            if "problem" in updates and "when_to_use" not in updates:
+                sk.when_to_use = updates["problem"]
+            if "solution" in updates and "body_extra" not in updates and not sk.procedure:
+                sk.body_extra = updates["solution"]
+            if "steps" in updates and "procedure" not in updates:
+                sk.procedure = list(updates["steps"] or [])
+
+            # Rename
+            new_name = slugify(updates.get("name") or sk.name)
+            if new_name != sk.name:
+                sk.name = new_name
+
+            # Write to potentially new path
+            new_path = self._skill_file(sk.category, sk.name)
+            if new_path != path:
+                # Move the whole skill directory if rename or recategorize
+                new_dir = os.path.dirname(new_path)
+                if os.path.isdir(new_dir):
+                    logger.warning(f"Skill rename target exists: {new_dir}")
+                    return False
+                os.makedirs(os.path.dirname(new_dir), exist_ok=True)
+                os.rename(old_dir, new_dir)
+                # Also rename usage key
+                usage = self._load_usage()
+                if skill_id in usage:
+                    usage[sk.name] = usage.pop(skill_id)
+                    self._save_usage(usage)
+            self._write_skill(sk)
+            return True
+        return False
+
+    def delete_skill(self, skill_id: str) -> bool:
+        for path in self._iter_skill_files():
+            sk = self._read_skill(path)
+            if not sk or sk.name != skill_id:
+                continue
+            skill_dir = os.path.dirname(path)
+            try:
+                # Remove the whole skill dir
+                for root, dirs, files in os.walk(skill_dir, topdown=False):
+                    for f in files:
+                        os.remove(os.path.join(root, f))
+                    for d in dirs:
+                        os.rmdir(os.path.join(root, d))
+                os.rmdir(skill_dir)
+            except Exception as e:
+                logger.warning(f"Failed to remove skill dir {skill_dir}: {e}")
+                return False
+            usage = self._load_usage()
+            if skill_id in usage:
+                del usage[skill_id]
+                self._save_usage(usage)
+            return True
+        return False
+
+    def record_use(self, skill_id: str) -> None:
+        usage = self._load_usage()
+        entry = usage.setdefault(skill_id, {"uses": 0, "last_used": None})
+        entry["uses"] = int(entry.get("uses", 0)) + 1
+        entry["last_used"] = int(time.time())
+        self._save_usage(usage)
+
+    # ----------------------------------------------------------------------
+    # Reading a single skill (used by the skill_view tool)
+    # ----------------------------------------------------------------------
+
+    def read_skill_md(self, name: str) -> Optional[str]:
+        for path in self._iter_skill_files():
+            sk = self._read_skill(path)
+            if sk and sk.name == name:
+                try:
+                    with open(path) as f:
+                        return f.read()
+                except Exception:
+                    return None
+        return None
+
+    def read_skill_reference(self, name: str, ref_path: str) -> Optional[str]:
+        """Read a sub-file under the skill's directory (references/, etc).
+        Refuses path traversal."""
+        for path in self._iter_skill_files():
+            sk = self._read_skill(path)
+            if not sk or sk.name != name:
+                continue
+            base = os.path.realpath(os.path.dirname(path))
+            target = os.path.realpath(os.path.join(base, ref_path))
+            if os.path.commonpath([base, target]) != base or target == os.path.dirname(path):
+                return None
+            if not os.path.isfile(target):
+                return None
+            try:
+                with open(target) as f:
+                    return f.read()
+            except Exception:
+                return None
+        return None
+
+    # ----------------------------------------------------------------------
+    # Index — the lightweight summary injected into the system prompt
+    # ----------------------------------------------------------------------
+
+    def index_for(
+        self,
+        owner: Optional[str] = None,
+        *,
+        active_toolsets: Optional[List[str]] = None,
+        platform: Optional[str] = None,
+    ) -> List[Dict]:
+        """Return the `[{name, description, category, status}]` list the
+        agent sees in its system prompt.
+
+        Includes:
+          - All published skills.
+          - Drafts written by the teacher-escalation loop
+            (`source == "teacher-escalation"`). The whole point of
+            the teacher loop is for the student to find the new
+            procedure on the very next turn — waiting for a manual
+            publish click defeats the loop.
+
+        Excludes user-created drafts (status=draft, source != teacher-
+        escalation) — those are work-in-progress and pollute the
+        prompt with half-finished procedures.
+        """
+        active_toolsets = active_toolsets or []
+        out = []
+        for s in self.load(owner=owner):
+            status = s.get("status")
+            # Published + None (pre-status legacy) always included.
+            # Drafts only if the teacher wrote them.
+            if status not in ("published", None):
+                if status == "draft" and s.get("source") == "teacher-escalation":
+                    pass  # let it through
+                else:
+                    continue
+            # Platform gating
+            if platform and s.get("platforms") and platform not in s["platforms"]:
+                continue
+            # requires_toolsets: hide unless every required toolset is active
+            req = s.get("requires_toolsets") or []
+            if req and not all(t in active_toolsets for t in req):
+                continue
+            # fallback_for_toolsets: hide when any of those toolsets is active
+            fb = s.get("fallback_for_toolsets") or []
+            if fb and any(t in active_toolsets for t in fb):
+                continue
+            out.append({
+                "name": s["name"],
+                "description": s.get("description") or s.get("title", ""),
+                "category": s.get("category", "general"),
+                "status": status or "published",
+            })
+        out.sort(key=lambda x: (x["category"], x["name"]))
+        return out
+
+    # ----------------------------------------------------------------------
+    # Relevance search (kept for the existing /api/skills/search endpoint
+    # and the `manage_skills` action="search"). Now operates on the new
+    # field set.
+    # ----------------------------------------------------------------------
+
+    def get_relevant_skills(
+        self,
+        query: str,
+        skills: Optional[List[Dict]] = None,
+        threshold: float = 0.3,
+        max_items: int = 5,
+        min_confidence: float = 0.0,
+    ) -> List[Dict]:
+        if skills is None:
+            skills = self.load_all()
+        if not skills or not query.strip():
+            return []
+        # Consider published AND draft skills for relevance retrieval.
+        # The teacher-escalation loop writes new skills as drafts; the
+        # whole point is for the student to find them on the next try
+        # without a manual publish click. The UI flags teacher-written
+        # entries with a 🎓 badge so users can demote / delete bad
+        # ones when they spot them.
+        skills = [s for s in skills if s.get("status") in ("published", "draft")]
+        # Confidence gate (used by prompt-injection, NOT by search): a DRAFT
+        # skill must clear the bar to be injected. Published skills are already
+        # vetted, so they always qualify. Missing confidence = treat as 1.0
+        # (legacy skills shouldn't silently vanish). 0 disables the gate.
+        if min_confidence > 0:
+            def _passes(s):
+                if s.get("status") == "published":
+                    return True
+                c = s.get("confidence")
+                if c is None:
+                    return True  # unset → don't filter (legacy)
+                return _to_float(c, 1.0) >= min_confidence  # unparseable → pass
+            skills = [s for s in skills if _passes(s)]
+        if not skills:
+            return []
+
+        query_tokens = _tokenize(query)
+        scored = []
+        for sk in skills:
+            text = " ".join([
+                sk.get("name", ""),
+                sk.get("description", ""),
+                sk.get("when_to_use", ""),
+                " ".join(sk.get("tags", []) or []),
+                " ".join(sk.get("procedure", []) or []),
+            ])
+            score = _jaccard(query_tokens, _tokenize(text))
+            for tag in sk.get("tags", []) or []:
+                if tag and tag in query.lower():
+                    score = max(score, 0.3) * 1.3
+            if query.lower() in (sk.get("description") or "").lower():
+                score = max(score, 0.6)
+            score *= 1.0 + _to_float(sk.get("confidence"), 0.5) * 0.1
+            if sk.get("uses", 0) > 0:
+                score *= 1.05
+            if score >= threshold:
+                scored.append((score, sk))
+        scored.sort(key=lambda x: x[0], reverse=True)
+        return [sk for _, sk in scored[:max_items]]