Odysseus v1.0
This commit is contained in:
610
services/memory/skills.py
Normal file
610
services/memory/skills.py
Normal file
@@ -0,0 +1,610 @@
|
||||
# services/memory/skills.py
|
||||
"""Skills storage layer.
|
||||
|
||||
Skills live on disk as `data/skills/<category>/<name>/SKILL.md` files with
|
||||
YAML frontmatter and a structured markdown body (When to Use / Procedure /
|
||||
Pitfalls / Verification). See `skill_format.py` for the format.
|
||||
|
||||
Usage counters (`uses`, `last_used`) live in a sidecar
|
||||
`data/skills/_usage.json` keyed by skill name so the SKILL.md content
|
||||
doesn't churn on every retrieval.
|
||||
|
||||
Ownership: skills declare `owner: <username>` in frontmatter. Single-user
|
||||
deployments can leave that blank.
|
||||
|
||||
This module also retains a JSON fallback for any legacy `data/skills.json`
|
||||
entries — they're surfaced as read-only `Skill` objects so old data still
|
||||
loads while a user migrates them to disk.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
from .skill_format import Skill, slugify
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Token / similarity helpers (kept for the relevance fallback)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _tokenize(text: str) -> set:
|
||||
return {w.strip('.,!?";:()[]') for w in (text or "").lower().split() if len(w) > 1}
|
||||
|
||||
|
||||
def _jaccard(a: set, b: set) -> float:
|
||||
if not a or not b:
|
||||
return 0.0
|
||||
return len(a & b) / len(a | b)
|
||||
|
||||
|
||||
def _to_float(x, default: float = 0.0) -> float:
|
||||
"""Coerce a possibly hand-edited frontmatter value to float without
|
||||
raising — a blank or non-numeric `confidence:` in a SKILL.md must not
|
||||
blow up retrieval or eviction."""
|
||||
try:
|
||||
return float(x)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SkillsManager
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SkillsManager:
|
||||
"""Read/write SKILL.md files under <data_dir>/skills/."""
|
||||
|
||||
def __init__(self, data_dir: str):
|
||||
self.data_dir = data_dir
|
||||
self.skills_root = os.path.join(data_dir, "skills")
|
||||
self.usage_file = os.path.join(self.skills_root, "_usage.json")
|
||||
self.legacy_file = os.path.join(data_dir, "skills.json") # back-compat
|
||||
os.makedirs(self.skills_root, exist_ok=True)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Path helpers
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def _skill_dir(self, category: str, name: str) -> str:
|
||||
cat = slugify(category or "general", fallback="general")
|
||||
nm = slugify(name, fallback="skill")
|
||||
return os.path.join(self.skills_root, cat, nm)
|
||||
|
||||
def _skill_file(self, category: str, name: str) -> str:
|
||||
return os.path.join(self._skill_dir(category, name), "SKILL.md")
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Usage sidecar
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def _load_usage(self) -> Dict[str, Dict]:
|
||||
if not os.path.exists(self.usage_file):
|
||||
return {}
|
||||
try:
|
||||
with open(self.usage_file) as f:
|
||||
d = json.load(f)
|
||||
return d if isinstance(d, dict) else {}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def _save_usage(self, usage: Dict[str, Dict]) -> None:
|
||||
try:
|
||||
from core.atomic_io import atomic_write_json
|
||||
atomic_write_json(self.usage_file, usage, indent=2)
|
||||
except Exception:
|
||||
tmp = self.usage_file + ".tmp"
|
||||
with open(tmp, "w") as f:
|
||||
json.dump(usage, f, indent=2)
|
||||
os.replace(tmp, self.usage_file)
|
||||
|
||||
def set_audit(self, name: str, verdict: str, by_teacher: bool = False,
|
||||
worker_model: str = "", teacher_model: str = "") -> None:
|
||||
"""Record the last test/audit result for a skill in the usage sidecar
|
||||
(so it surfaces in load() without touching SKILL.md). Drives the
|
||||
'verified' check + teacher mark on the card."""
|
||||
import time as _t
|
||||
usage = self._load_usage()
|
||||
e = usage.setdefault(name, {"uses": 0, "last_used": None})
|
||||
e["audit_verdict"] = verdict
|
||||
e["audit_by_teacher"] = bool(by_teacher)
|
||||
if worker_model:
|
||||
e["audit_worker_model"] = worker_model
|
||||
if teacher_model:
|
||||
e["audit_teacher_model"] = teacher_model
|
||||
e["audited_at"] = _t.time()
|
||||
self._save_usage(usage)
|
||||
|
||||
def set_necessity(self, name: str, necessary: bool,
|
||||
redundant_with=None, reason: str = "") -> None:
|
||||
"""Record the advisory 'is this skill necessary?' judgment in the usage
|
||||
sidecar. Surfaced on the card as a flag; never acts on the skill."""
|
||||
usage = self._load_usage()
|
||||
e = usage.setdefault(name, {"uses": 0, "last_used": None})
|
||||
e["necessity"] = {
|
||||
"necessary": bool(necessary),
|
||||
"redundant_with": list(redundant_with or []),
|
||||
"reason": str(reason or ""),
|
||||
}
|
||||
self._save_usage(usage)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Disk scan
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def _iter_skill_files(self) -> Iterable[str]:
|
||||
if not os.path.isdir(self.skills_root):
|
||||
return
|
||||
for root, _dirs, files in os.walk(self.skills_root, followlinks=False):
|
||||
if "SKILL.md" in files:
|
||||
yield os.path.join(root, "SKILL.md")
|
||||
|
||||
def _read_skill(self, path: str) -> Optional[Skill]:
|
||||
try:
|
||||
with open(path) as f:
|
||||
text = f.read()
|
||||
return Skill.from_markdown(text, path=path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse {path}: {e}")
|
||||
return None
|
||||
|
||||
def _write_skill(self, sk: Skill) -> str:
|
||||
path = self._skill_file(sk.category or "general", sk.name)
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
from core.atomic_io import atomic_write_text
|
||||
atomic_write_text(path, sk.to_markdown())
|
||||
sk.path = path
|
||||
return path
|
||||
|
||||
def backfill_owner(self, primary_owner: str, valid_owners: Optional[set[str]] = None) -> int:
|
||||
"""Assign legacy/unclaimed skill files to the primary owner.
|
||||
|
||||
Skills are disk-backed, so the DB legacy-owner migration cannot fix
|
||||
them. If strict owner filtering is enabled and SKILL.md files have no
|
||||
owner or an owner from a deleted/test account, the UI appears empty even
|
||||
though files still exist. This mirrors the DB legacy-owner sweep.
|
||||
"""
|
||||
primary_owner = (primary_owner or "").strip()
|
||||
if not primary_owner:
|
||||
return 0
|
||||
valid_owners = set(valid_owners or [])
|
||||
changed = 0
|
||||
for path in self._iter_skill_files():
|
||||
sk = self._read_skill(path)
|
||||
if not sk:
|
||||
continue
|
||||
owner = (sk.owner or "").strip()
|
||||
if owner == primary_owner:
|
||||
continue
|
||||
if owner and owner in valid_owners:
|
||||
continue
|
||||
sk.owner = primary_owner
|
||||
try:
|
||||
self._write_skill(sk)
|
||||
changed += 1
|
||||
except Exception as e:
|
||||
logger.warning("Failed to backfill owner for skill %s: %s", sk.name, e)
|
||||
return changed
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Public API — keeps the old method names so callers don't break
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def load_all(self) -> List[Dict]:
|
||||
"""Return every skill as a plain dict, plus any legacy JSON entries."""
|
||||
usage = self._load_usage()
|
||||
out: List[Dict] = []
|
||||
seen_names: set[str] = set()
|
||||
for path in self._iter_skill_files():
|
||||
sk = self._read_skill(path)
|
||||
if not sk:
|
||||
continue
|
||||
d = sk.to_dict()
|
||||
u = usage.get(sk.name) or {}
|
||||
d["uses"] = int(u.get("uses", 0))
|
||||
d["last_used"] = u.get("last_used")
|
||||
d["audit_verdict"] = u.get("audit_verdict")
|
||||
d["audit_by_teacher"] = bool(u.get("audit_by_teacher"))
|
||||
d["audit_worker_model"] = u.get("audit_worker_model")
|
||||
d["audit_teacher_model"] = u.get("audit_teacher_model")
|
||||
d["audited_at"] = u.get("audited_at")
|
||||
d["necessity"] = u.get("necessity")
|
||||
out.append(d)
|
||||
seen_names.add(sk.name)
|
||||
# Legacy JSON entries — surfaced as draft, not editable from new flow
|
||||
if os.path.exists(self.legacy_file):
|
||||
try:
|
||||
with open(self.legacy_file) as f:
|
||||
legacy = json.load(f)
|
||||
if isinstance(legacy, list):
|
||||
for row in legacy:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
name = slugify(row.get("title") or row.get("id") or "skill")
|
||||
if name in seen_names:
|
||||
continue
|
||||
out.append({
|
||||
"id": row.get("id") or name,
|
||||
"name": name,
|
||||
"description": row.get("title", ""),
|
||||
"version": "0.0.1",
|
||||
"category": "legacy",
|
||||
"tags": row.get("tags") or [],
|
||||
"status": row.get("status") or "draft",
|
||||
"confidence": row.get("confidence", 0.5),
|
||||
"source": row.get("source", "imported"),
|
||||
"owner": row.get("owner"),
|
||||
"when_to_use": row.get("problem", ""),
|
||||
"procedure": row.get("steps") or [],
|
||||
"pitfalls": [],
|
||||
"verification": [],
|
||||
"body_extra": row.get("solution", ""),
|
||||
"title": row.get("title", ""),
|
||||
"problem": row.get("problem", ""),
|
||||
"solution": row.get("solution", ""),
|
||||
"steps": row.get("steps") or [],
|
||||
"uses": row.get("uses", 0),
|
||||
"last_used": row.get("last_used"),
|
||||
"_legacy": True,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
return out
|
||||
|
||||
def load(self, owner: Optional[str] = None) -> List[Dict]:
|
||||
entries = self.load_all()
|
||||
if owner is None:
|
||||
return entries
|
||||
# SECURITY: strict ownership filter. The previous predicate also
|
||||
# included skills with NO owner field (`not s.get("owner")`), which
|
||||
# leaked legacy / un-stamped skills to every authenticated user.
|
||||
# Hide them now; the owner needs to be backfilled on disk if those
|
||||
# skills should be visible to a specific user.
|
||||
return [s for s in entries if s.get("owner") == owner]
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# CRUD — disk-backed
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def add_skill(
|
||||
self,
|
||||
title: str = "",
|
||||
problem: str = "",
|
||||
solution: str = "",
|
||||
steps: Optional[List[str]] = None,
|
||||
tags: Optional[List[str]] = None,
|
||||
source: str = "learned",
|
||||
teacher_model: Optional[str] = None,
|
||||
confidence: float = 0.8,
|
||||
session_id: Optional[str] = None,
|
||||
owner: Optional[str] = None,
|
||||
# New-schema fields (optional; fall back to old shape if absent)
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
category: str = "general",
|
||||
when_to_use: Optional[str] = None,
|
||||
procedure: Optional[List[str]] = None,
|
||||
pitfalls: Optional[List[str]] = None,
|
||||
verification: Optional[List[str]] = None,
|
||||
platforms: Optional[List[str]] = None,
|
||||
requires_toolsets: Optional[List[str]] = None,
|
||||
fallback_for_toolsets: Optional[List[str]] = None,
|
||||
status: str = "draft",
|
||||
version: str = "1.0.0",
|
||||
) -> Dict:
|
||||
# Normalize name
|
||||
nm = slugify(name or title or description or "skill")
|
||||
|
||||
# Free dedup-at-creation (always, no API): for LLM-authored skills,
|
||||
# skip if a near-identical skill already exists (Jaccard over
|
||||
# name+description+when_to_use+procedure). User-authored skills are
|
||||
# never auto-skipped — a human asked for it. The every-X AI audit
|
||||
# handles the fuzzier near-duplicates this cheap check won't catch.
|
||||
_all = self.load_all()
|
||||
if source != "user":
|
||||
cand = _tokenize(" ".join([
|
||||
nm, (description or title or ""),
|
||||
(when_to_use if when_to_use is not None else (problem or "")),
|
||||
" ".join(procedure if procedure is not None else (steps or [])),
|
||||
]))
|
||||
if cand:
|
||||
for s in _all:
|
||||
ex = _tokenize(" ".join([
|
||||
s.get("name", ""), s.get("description", ""),
|
||||
s.get("when_to_use", ""),
|
||||
" ".join(s.get("procedure", []) or []),
|
||||
]))
|
||||
if _jaccard(cand, ex) >= 0.82:
|
||||
# Near-identical — don't grow the library; bump the
|
||||
# existing skill's usage and return it so the caller
|
||||
# knows it already exists.
|
||||
try:
|
||||
self.record_use(s["name"])
|
||||
except Exception:
|
||||
pass
|
||||
return {**s, "_deduped": True, "_duplicate_of": s.get("name")}
|
||||
|
||||
# Avoid clobbering an existing skill with the same name
|
||||
existing = {s["name"] for s in _all}
|
||||
base = nm
|
||||
i = 2
|
||||
while nm in existing:
|
||||
nm = f"{base}-{i}"
|
||||
i += 1
|
||||
|
||||
sk = Skill(
|
||||
name=nm,
|
||||
description=(description or title or "").strip(),
|
||||
version=version,
|
||||
category=category or "general",
|
||||
tags=list(tags or []),
|
||||
platforms=list(platforms or []),
|
||||
requires_toolsets=list(requires_toolsets or []),
|
||||
fallback_for_toolsets=list(fallback_for_toolsets or []),
|
||||
status=status or "draft",
|
||||
confidence=float(confidence),
|
||||
source=source,
|
||||
teacher_model=teacher_model,
|
||||
owner=owner,
|
||||
when_to_use=(when_to_use if when_to_use is not None else (problem or "")),
|
||||
procedure=list(procedure if procedure is not None else (steps or [])),
|
||||
pitfalls=list(pitfalls or []),
|
||||
verification=list(verification or []),
|
||||
body_extra=(solution if solution and not procedure else ""),
|
||||
)
|
||||
self._write_skill(sk)
|
||||
|
||||
return sk.to_dict()
|
||||
|
||||
def update_skill(self, skill_id: str, updates: Dict) -> bool:
|
||||
"""`skill_id` is the slug name. Allows updating any field plus
|
||||
renames if `name` changes (file is moved on disk)."""
|
||||
for path in self._iter_skill_files():
|
||||
sk = self._read_skill(path)
|
||||
if not sk or sk.name != skill_id:
|
||||
continue
|
||||
old_dir = os.path.dirname(path)
|
||||
|
||||
# Apply updates in a Skill-shape friendly way
|
||||
scalar_keys = (
|
||||
"description", "version", "category", "status", "confidence",
|
||||
"source", "teacher_model", "owner", "when_to_use",
|
||||
"body_extra",
|
||||
)
|
||||
for k in scalar_keys:
|
||||
if k in updates:
|
||||
setattr(sk, k, updates[k])
|
||||
list_keys = ("tags", "procedure", "pitfalls", "verification",
|
||||
"platforms", "requires_toolsets", "fallback_for_toolsets")
|
||||
for k in list_keys:
|
||||
if k in updates:
|
||||
setattr(sk, k, list(updates[k] or []))
|
||||
|
||||
# Old-schema field aliases
|
||||
if "title" in updates and "description" not in updates:
|
||||
sk.description = updates["title"]
|
||||
if "problem" in updates and "when_to_use" not in updates:
|
||||
sk.when_to_use = updates["problem"]
|
||||
if "solution" in updates and "body_extra" not in updates and not sk.procedure:
|
||||
sk.body_extra = updates["solution"]
|
||||
if "steps" in updates and "procedure" not in updates:
|
||||
sk.procedure = list(updates["steps"] or [])
|
||||
|
||||
# Rename
|
||||
new_name = slugify(updates.get("name") or sk.name)
|
||||
if new_name != sk.name:
|
||||
sk.name = new_name
|
||||
|
||||
# Write to potentially new path
|
||||
new_path = self._skill_file(sk.category, sk.name)
|
||||
if new_path != path:
|
||||
# Move the whole skill directory if rename or recategorize
|
||||
new_dir = os.path.dirname(new_path)
|
||||
if os.path.isdir(new_dir):
|
||||
logger.warning(f"Skill rename target exists: {new_dir}")
|
||||
return False
|
||||
os.makedirs(os.path.dirname(new_dir), exist_ok=True)
|
||||
os.rename(old_dir, new_dir)
|
||||
# Also rename usage key
|
||||
usage = self._load_usage()
|
||||
if skill_id in usage:
|
||||
usage[sk.name] = usage.pop(skill_id)
|
||||
self._save_usage(usage)
|
||||
self._write_skill(sk)
|
||||
return True
|
||||
return False
|
||||
|
||||
def delete_skill(self, skill_id: str) -> bool:
|
||||
for path in self._iter_skill_files():
|
||||
sk = self._read_skill(path)
|
||||
if not sk or sk.name != skill_id:
|
||||
continue
|
||||
skill_dir = os.path.dirname(path)
|
||||
try:
|
||||
# Remove the whole skill dir
|
||||
for root, dirs, files in os.walk(skill_dir, topdown=False):
|
||||
for f in files:
|
||||
os.remove(os.path.join(root, f))
|
||||
for d in dirs:
|
||||
os.rmdir(os.path.join(root, d))
|
||||
os.rmdir(skill_dir)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to remove skill dir {skill_dir}: {e}")
|
||||
return False
|
||||
usage = self._load_usage()
|
||||
if skill_id in usage:
|
||||
del usage[skill_id]
|
||||
self._save_usage(usage)
|
||||
return True
|
||||
return False
|
||||
|
||||
def record_use(self, skill_id: str) -> None:
|
||||
usage = self._load_usage()
|
||||
entry = usage.setdefault(skill_id, {"uses": 0, "last_used": None})
|
||||
entry["uses"] = int(entry.get("uses", 0)) + 1
|
||||
entry["last_used"] = int(time.time())
|
||||
self._save_usage(usage)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Reading a single skill (used by the skill_view tool)
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def read_skill_md(self, name: str) -> Optional[str]:
|
||||
for path in self._iter_skill_files():
|
||||
sk = self._read_skill(path)
|
||||
if sk and sk.name == name:
|
||||
try:
|
||||
with open(path) as f:
|
||||
return f.read()
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def read_skill_reference(self, name: str, ref_path: str) -> Optional[str]:
|
||||
"""Read a sub-file under the skill's directory (references/, etc).
|
||||
Refuses path traversal."""
|
||||
for path in self._iter_skill_files():
|
||||
sk = self._read_skill(path)
|
||||
if not sk or sk.name != name:
|
||||
continue
|
||||
base = os.path.realpath(os.path.dirname(path))
|
||||
target = os.path.realpath(os.path.join(base, ref_path))
|
||||
if os.path.commonpath([base, target]) != base or target == os.path.dirname(path):
|
||||
return None
|
||||
if not os.path.isfile(target):
|
||||
return None
|
||||
try:
|
||||
with open(target) as f:
|
||||
return f.read()
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Index — the lightweight summary injected into the system prompt
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def index_for(
|
||||
self,
|
||||
owner: Optional[str] = None,
|
||||
*,
|
||||
active_toolsets: Optional[List[str]] = None,
|
||||
platform: Optional[str] = None,
|
||||
) -> List[Dict]:
|
||||
"""Return the `[{name, description, category, status}]` list the
|
||||
agent sees in its system prompt.
|
||||
|
||||
Includes:
|
||||
- All published skills.
|
||||
- Drafts written by the teacher-escalation loop
|
||||
(`source == "teacher-escalation"`). The whole point of
|
||||
the teacher loop is for the student to find the new
|
||||
procedure on the very next turn — waiting for a manual
|
||||
publish click defeats the loop.
|
||||
|
||||
Excludes user-created drafts (status=draft, source != teacher-
|
||||
escalation) — those are work-in-progress and pollute the
|
||||
prompt with half-finished procedures.
|
||||
"""
|
||||
active_toolsets = active_toolsets or []
|
||||
out = []
|
||||
for s in self.load(owner=owner):
|
||||
status = s.get("status")
|
||||
# Published + None (pre-status legacy) always included.
|
||||
# Drafts only if the teacher wrote them.
|
||||
if status not in ("published", None):
|
||||
if status == "draft" and s.get("source") == "teacher-escalation":
|
||||
pass # let it through
|
||||
else:
|
||||
continue
|
||||
# Platform gating
|
||||
if platform and s.get("platforms") and platform not in s["platforms"]:
|
||||
continue
|
||||
# requires_toolsets: hide unless every required toolset is active
|
||||
req = s.get("requires_toolsets") or []
|
||||
if req and not all(t in active_toolsets for t in req):
|
||||
continue
|
||||
# fallback_for_toolsets: hide when any of those toolsets is active
|
||||
fb = s.get("fallback_for_toolsets") or []
|
||||
if fb and any(t in active_toolsets for t in fb):
|
||||
continue
|
||||
out.append({
|
||||
"name": s["name"],
|
||||
"description": s.get("description") or s.get("title", ""),
|
||||
"category": s.get("category", "general"),
|
||||
"status": status or "published",
|
||||
})
|
||||
out.sort(key=lambda x: (x["category"], x["name"]))
|
||||
return out
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Relevance search (kept for the existing /api/skills/search endpoint
|
||||
# and the `manage_skills` action="search"). Now operates on the new
|
||||
# field set.
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
def get_relevant_skills(
|
||||
self,
|
||||
query: str,
|
||||
skills: Optional[List[Dict]] = None,
|
||||
threshold: float = 0.3,
|
||||
max_items: int = 5,
|
||||
min_confidence: float = 0.0,
|
||||
) -> List[Dict]:
|
||||
if skills is None:
|
||||
skills = self.load_all()
|
||||
if not skills or not query.strip():
|
||||
return []
|
||||
# Consider published AND draft skills for relevance retrieval.
|
||||
# The teacher-escalation loop writes new skills as drafts; the
|
||||
# whole point is for the student to find them on the next try
|
||||
# without a manual publish click. The UI flags teacher-written
|
||||
# entries with a 🎓 badge so users can demote / delete bad
|
||||
# ones when they spot them.
|
||||
skills = [s for s in skills if s.get("status") in ("published", "draft")]
|
||||
# Confidence gate (used by prompt-injection, NOT by search): a DRAFT
|
||||
# skill must clear the bar to be injected. Published skills are already
|
||||
# vetted, so they always qualify. Missing confidence = treat as 1.0
|
||||
# (legacy skills shouldn't silently vanish). 0 disables the gate.
|
||||
if min_confidence > 0:
|
||||
def _passes(s):
|
||||
if s.get("status") == "published":
|
||||
return True
|
||||
c = s.get("confidence")
|
||||
if c is None:
|
||||
return True # unset → don't filter (legacy)
|
||||
return _to_float(c, 1.0) >= min_confidence # unparseable → pass
|
||||
skills = [s for s in skills if _passes(s)]
|
||||
if not skills:
|
||||
return []
|
||||
|
||||
query_tokens = _tokenize(query)
|
||||
scored = []
|
||||
for sk in skills:
|
||||
text = " ".join([
|
||||
sk.get("name", ""),
|
||||
sk.get("description", ""),
|
||||
sk.get("when_to_use", ""),
|
||||
" ".join(sk.get("tags", []) or []),
|
||||
" ".join(sk.get("procedure", []) or []),
|
||||
])
|
||||
score = _jaccard(query_tokens, _tokenize(text))
|
||||
for tag in sk.get("tags", []) or []:
|
||||
if tag and tag in query.lower():
|
||||
score = max(score, 0.3) * 1.3
|
||||
if query.lower() in (sk.get("description") or "").lower():
|
||||
score = max(score, 0.6)
|
||||
score *= 1.0 + _to_float(sk.get("confidence"), 0.5) * 0.1
|
||||
if sk.get("uses", 0) > 0:
|
||||
score *= 1.05
|
||||
if score >= threshold:
|
||||
scored.append((score, sk))
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [sk for _, sk in scored[:max_items]]
|
||||
Reference in New Issue
Block a user