From 5ee30cc144ea1583f4a31e073537d07d2d228feb Mon Sep 17 00:00:00 2001 From: Vykos Date: Tue, 2 Jun 2026 19:27:43 +0200 Subject: [PATCH] Scope skills usage by owner (#1312) --- routes/skills_routes.py | 35 ++++--- services/memory/skills.py | 51 ++++++--- src/agent_loop.py | 2 +- src/builtin_actions.py | 4 +- tests/test_skills_manager_owner_isolation.py | 104 +++++++++++++------ 5 files changed, 132 insertions(+), 64 deletions(-) diff --git a/routes/skills_routes.py b/routes/skills_routes.py index d25bb76..488267b 100644 --- a/routes/skills_routes.py +++ b/routes/skills_routes.py @@ -463,7 +463,7 @@ async def _run_skill_test_job(key, name, md, task, url, model, headers, owner, s if skills_manager is not None: v = (job["verdict"] or {}).get("verdict") or "unknown" try: - skills_manager.set_audit(name, v, by_teacher=False, worker_model=model) + skills_manager.set_audit(name, v, by_teacher=False, worker_model=model, owner=owner) except Exception: pass conf = {"pass": 0.95, "needs_work": 0.6, "fail": 0.4}.get(v) @@ -563,6 +563,7 @@ def _skill_duplicate_blocker(skills_manager, name: str, owner) -> Optional[str]: False, [keeper_name], f"Lower-priority duplicate of {keeper_name}", + owner=owner, ) except Exception: pass @@ -629,7 +630,7 @@ def _audit_finalize_status(skills_manager, name: str, owner, verdict: str, if generic_reason: necessary = False try: - skills_manager.set_necessity(name, False, [], generic_reason) + skills_manager.set_necessity(name, False, [], generic_reason, owner=owner) except Exception: pass duplicate_of = _skill_duplicate_blocker(skills_manager, name, owner) if verdict == "pass" else None @@ -788,7 +789,8 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers, nec = await _eval_skill_necessity(md, others, url, model, headers) if nec is not None: skills_manager.set_necessity(name, nec.get("necessary", True), - nec.get("redundant_with"), nec.get("reason")) + nec.get("redundant_with"), nec.get("reason"), + owner=owner) if not nec.get("necessary", True): log(f"{name}: possibly unnecessary — {nec.get('reason', '')[:80]}") except Exception as e: @@ -800,11 +802,11 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers, reason = generic_reason or (f"Lower-priority duplicate of {duplicate_of}" if duplicate_of else str((nec or {}).get("reason") or "Unnecessary skill")) try: skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35}, owner=owner) - skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model) + skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model, owner=owner) if duplicate_of: - skills_manager.set_necessity(name, False, [duplicate_of], reason) + skills_manager.set_necessity(name, False, [duplicate_of], reason, owner=owner) else: - skills_manager.set_necessity(name, False, [], reason) + skills_manager.set_necessity(name, False, [], reason, owner=owner) except Exception: pass log(f"{name}: draft — skipped functional test ({reason[:100]})") @@ -848,13 +850,13 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers, if fixed and fixed.strip() != md.strip(): _apply_skill_md(skills_manager, name, fixed, owner) _set_conf(0.95) - skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model) + skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner) refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None) status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.95, (refreshed or {}).get("necessity"), verdict) log(f"{name}: {status} — confidence 95%") return {"skill": name, "result": "pass", "verdict": verdict, "confidence": 0.95, "status": status} if v in ("unknown", "inconclusive"): - skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model) + skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model, owner=owner) status = _audit_finalize_status(skills_manager, name, owner, "inconclusive", skill.get("confidence") or 0.0, skill.get("necessity")) log(f"{name}: {status} — inconclusive") return {"skill": name, "result": "inconclusive", "verdict": verdict, "status": status} @@ -869,7 +871,7 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers, log(f"{name}: retry (self) = {v}") if v == "pass": _set_conf(0.85) - skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model) + skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner) refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None) status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.85, (refreshed or {}).get("necessity"), verdict) log(f"{name}: {status} — confidence 85% after self-edit") @@ -893,7 +895,9 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers, log(f"{name}: retry on student after teacher rewrite = {v}") if v == "pass": _set_conf(0.8) - skills_manager.set_audit(name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model) + skills_manager.set_audit( + name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model, owner=owner + ) refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None) status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.8, (refreshed or {}).get("necessity"), verdict) log(f"{name}: {status} — confidence 80% after teacher rewrite") @@ -908,6 +912,7 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers, name, v or "fail", by_teacher=teacher_ran, worker_model=model, teacher_model=(teacher[1] if teacher_ran and teacher else ""), + owner=owner, ) log(f"{name}: flagged — confidence lowered, kept as draft for manual review") return {"skill": name, "result": "flagged", "verdict": verdict, "confidence": 0.35} @@ -976,7 +981,7 @@ async def _run_audit_all_job(key, skills_manager, names, url, model, headers, te job.pop("task", None) -def _resolve_audit_models(): +def _resolve_audit_models(owner=None): """Resolve (url, model, headers, teacher) for an audit run from Settings. Worker = Utility model (falling back to Default, normalized to a served @@ -985,7 +990,7 @@ def _resolve_audit_models(): ValueError if no worker model. """ from src.endpoint_resolver import resolve_endpoint - url, model, headers = resolve_endpoint("utility") + url, model, headers = resolve_endpoint("utility", owner=owner) if not url or not model: raise ValueError("No model configured — set a Default or Utility model in Settings.") try: @@ -1029,7 +1034,7 @@ async def run_scheduled_skill_audit(skills_manager: SkillsManager, return {"status": "running", "skipped": True} try: - url, model, headers, teacher = _resolve_audit_models() + url, model, headers, teacher = _resolve_audit_models(owner=owner) except ValueError as e: logger.info(f"Scheduled skill audit skipped — {e}") return {"status": "skipped", "reason": str(e)} @@ -1280,7 +1285,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter: # Prefer the configured DEFAULT (→ Utility) model — not the current chat # session's model. Fall back to the caller's session model only if unset. - url, model, headers = resolve_endpoint("default") + url, model, headers = resolve_endpoint("default", owner=user) if not url or not model: url = url or ((body.get("endpoint_url") or "").strip() or None) model = model or ((body.get("model") or "").strip() or None) @@ -1360,7 +1365,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter: # Worker model (Default, normalized) + optional teacher — shared resolver. try: - url, model, headers, teacher = _resolve_audit_models() + url, model, headers, teacher = _resolve_audit_models(owner=user) except ValueError as e: raise HTTPException(400, str(e)) diff --git a/services/memory/skills.py b/services/memory/skills.py index 24e2dfc..ee0dd26 100644 --- a/services/memory/skills.py +++ b/services/memory/skills.py @@ -6,8 +6,8 @@ YAML frontmatter and a structured markdown body (When to Use / Procedure / Pitfalls / Verification). See `skill_format.py` for the format. Usage counters (`uses`, `last_used`) live in a sidecar -`data/skills/_usage.json` keyed by skill name so the SKILL.md content -doesn't churn on every retrieval. +`data/skills/_usage.json` keyed by owner plus skill name so the SKILL.md +content doesn't churn on every retrieval. Ownership: skills declare `owner: ` in frontmatter. Single-user deployments can leave that blank. @@ -105,14 +105,29 @@ class SkillsManager: json.dump(usage, f, indent=2) os.replace(tmp, self.usage_file) + @staticmethod + def _usage_key(name: str, owner: Optional[str] = None) -> str: + # Skill names are not globally unique once multiple owners are present. + # Keep the usage sidecar keyed the same way the skill file is scoped. + return f"{owner}::{name}" if owner else name + + def _usage_entry(self, usage: Dict[str, Dict], name: str, owner: Optional[str] = None) -> Dict: + key = self._usage_key(name, owner) + entry = usage.get(key) + if isinstance(entry, dict): + return entry + return {} + def set_audit(self, name: str, verdict: str, by_teacher: bool = False, - worker_model: str = "", teacher_model: str = "") -> None: + worker_model: str = "", teacher_model: str = "", + owner: Optional[str] = None) -> None: """Record the last test/audit result for a skill in the usage sidecar (so it surfaces in load() without touching SKILL.md). Drives the 'verified' check + teacher mark on the card.""" import time as _t usage = self._load_usage() - e = usage.setdefault(name, {"uses": 0, "last_used": None}) + key = self._usage_key(name, owner) + e = usage.setdefault(key, {"uses": 0, "last_used": None}) e["audit_verdict"] = verdict e["audit_by_teacher"] = bool(by_teacher) if worker_model: @@ -123,11 +138,13 @@ class SkillsManager: self._save_usage(usage) def set_necessity(self, name: str, necessary: bool, - redundant_with=None, reason: str = "") -> None: + redundant_with=None, reason: str = "", + owner: Optional[str] = None) -> None: """Record the advisory 'is this skill necessary?' judgment in the usage sidecar. Surfaced on the card as a flag; never acts on the skill.""" usage = self._load_usage() - e = usage.setdefault(name, {"uses": 0, "last_used": None}) + key = self._usage_key(name, owner) + e = usage.setdefault(key, {"uses": 0, "last_used": None}) e["necessity"] = { "necessary": bool(necessary), "redundant_with": list(redundant_with or []), @@ -207,7 +224,7 @@ class SkillsManager: if not sk: continue d = sk.to_dict() - u = usage.get(sk.name) or {} + u = self._usage_entry(usage, sk.name, sk.owner) d["uses"] = int(u.get("uses", 0)) d["last_used"] = u.get("last_used") d["audit_verdict"] = u.get("audit_verdict") @@ -308,6 +325,7 @@ class SkillsManager: # never auto-skipped — a human asked for it. The every-X AI audit # handles the fuzzier near-duplicates this cheap check won't catch. _all = self.load_all() + _dedup_pool = _all if owner is None else [s for s in _all if s.get("owner") == owner] if source != "user": cand = _tokenize(" ".join([ nm, (description or title or ""), @@ -315,7 +333,7 @@ class SkillsManager: " ".join(procedure if procedure is not None else (steps or [])), ])) if cand: - for s in _all: + for s in _dedup_pool: ex = _tokenize(" ".join([ s.get("name", ""), s.get("description", ""), s.get("when_to_use", ""), @@ -326,7 +344,7 @@ class SkillsManager: # existing skill's usage and return it so the caller # knows it already exists. try: - self.record_use(s["name"]) + self.record_use(s["name"], owner=s.get("owner")) except Exception: pass return {**s, "_deduped": True, "_duplicate_of": s.get("name")} @@ -428,8 +446,9 @@ class SkillsManager: os.rename(old_dir, new_dir) # Also rename usage key usage = self._load_usage() - if skill_id in usage: - usage[sk.name] = usage.pop(skill_id) + old_usage_key = self._usage_key(skill_id, sk.owner) + if old_usage_key in usage: + usage[self._usage_key(sk.name, sk.owner)] = usage.pop(old_usage_key) self._save_usage(usage) self._write_skill(sk) return True @@ -455,15 +474,17 @@ class SkillsManager: logger.warning(f"Failed to remove skill dir {skill_dir}: {e}") return False usage = self._load_usage() - if skill_id in usage: - del usage[skill_id] + usage_key = self._usage_key(skill_id, sk.owner) + if usage_key in usage: + del usage[usage_key] self._save_usage(usage) return True return False - def record_use(self, skill_id: str) -> None: + def record_use(self, skill_id: str, owner: Optional[str] = None) -> None: usage = self._load_usage() - entry = usage.setdefault(skill_id, {"uses": 0, "last_used": None}) + key = self._usage_key(skill_id, owner) + entry = usage.setdefault(key, {"uses": 0, "last_used": None}) entry["uses"] = int(entry.get("uses", 0)) + 1 entry["last_used"] = int(time.time()) self._save_usage(usage) diff --git a/src/agent_loop.py b/src/agent_loop.py index 3fdce70..6c039b8 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -862,7 +862,7 @@ def _build_system_prompt( # matter how often it's been matched and applied. for _sk in relevant_skills: try: - sm.record_use(_sk.get('name', '')) + sm.record_use(_sk.get('name', ''), owner=owner) except Exception: pass lines.append("## Relevant skills for this request") diff --git a/src/builtin_actions.py b/src/builtin_actions.py index b8eed3f..3d72ad9 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -1313,7 +1313,7 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]: if not names: raise TaskNoop("no skills to test") - url, model, headers = resolve_endpoint("default") + url, model, headers = resolve_endpoint("default", owner=owner) if not url or not model: return "No Default/Utility model configured — set one in Settings.", False @@ -1374,7 +1374,7 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]: # user-set value (e.g. 1.0 → 0.95) is destructive. if v in ("pass", "needs_work", "fail", "inconclusive"): try: - sm.set_audit(name, v, by_teacher=False, worker_model=model) + sm.set_audit(name, v, by_teacher=False, worker_model=model, owner=owner) except Exception as _e: logger.warning(f"test_skills set_audit({name}) failed: {_e}") if v == "unknown": diff --git a/tests/test_skills_manager_owner_isolation.py b/tests/test_skills_manager_owner_isolation.py index f1fa127..8d93d9a 100644 --- a/tests/test_skills_manager_owner_isolation.py +++ b/tests/test_skills_manager_owner_isolation.py @@ -24,7 +24,6 @@ silently mutates a file owned by a different user AND overwrites the import os import sys import textwrap -import types from pathlib import Path from unittest.mock import MagicMock @@ -33,27 +32,12 @@ import pytest # ── module-load stubbing (matches other tests in this repo) ────────── # Stub heavy deps so importing the skills manager doesn't pull DB / FastAPI. -for _mod in [ - "sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", - "sqlalchemy.ext.declarative", "src.database", - "core.atomic_io", # we'll patch atomic_write_text below -]: +for _mod in ("sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", "sqlalchemy.ext.declarative"): if _mod not in sys.modules: - sys.modules[_mod] = MagicMock() - - -# Provide a no-op atomic_write_text for SkillsManager._write_skill. -def _fake_atomic_write_text(path, content, **kw): - Path(path).parent.mkdir(parents=True, exist_ok=True) - Path(path).write_text(content, encoding="utf-8") - -_fake_core = types.ModuleType("core.atomic_io") -_fake_core.atomic_write_text = _fake_atomic_write_text -_fake_core.atomic_write_json = lambda p, d, **kw: Path(p).write_text( - "{}", encoding="utf-8" -) -sys.modules["core.atomic_io"] = _fake_core - + try: + __import__(_mod) + except ImportError: + sys.modules[_mod] = MagicMock() from services.memory.skills import SkillsManager # noqa: E402 from services.memory.skill_format import Skill, slugify # noqa: E402 @@ -195,13 +179,12 @@ def test_update_skill_scalar_keys_exclude_owner(): ) -def test_read_skill_md_scopes_to_owner(tmp_path): +def test_read_skill_md_and_references_are_owner_scoped(tmp_path): """Two users own distinct skills with the same slug. read_skill_md() called with owner='alice' must return Alice's content, not Bob's. Called without an owner it must match only ownerless skills.""" skills_root = tmp_path / "skills" skills_root.mkdir(parents=True, exist_ok=True) - alice_path = _write_skill_md( skills_root, category="alice-cat", name="login-flow", owner="alice", description="alice secret", @@ -210,27 +193,28 @@ def test_read_skill_md_scopes_to_owner(tmp_path): skills_root, category="bob-cat", name="login-flow", owner="bob", description="bob secret", ) + refs = bob_path.parent / "references" + refs.mkdir() + (refs / "notes.txt").write_text("bob private notes", encoding="utf-8") sm = SkillsManager(str(tmp_path)) alice_md = sm.read_skill_md("login-flow", owner="alice") assert alice_md is not None, "read_skill_md returned None for alice's skill" - assert "alice secret" in alice_md, ( - f"read_skill_md(owner='alice') returned the wrong file: {alice_md[:200]}" - ) + assert "alice secret" in alice_md bob_md = sm.read_skill_md("login-flow", owner="bob") assert bob_md is not None, "read_skill_md returned None for bob's skill" - assert "bob secret" in bob_md, ( - f"read_skill_md(owner='bob') returned the wrong file: {bob_md[:200]}" - ) + assert "bob secret" in bob_md no_owner_md = sm.read_skill_md("login-flow") assert no_owner_md is None, ( "read_skill_md without owner matched an owned skill — " - "default should only match ownerless skills. Got: " - f"{no_owner_md[:200] if no_owner_md else 'None'}" + "default should only match ownerless skills." ) + assert sm.read_skill_md("login-flow", owner="charlie") is None + assert sm.read_skill_reference("login-flow", "references/notes.txt", owner="bob") == "bob private notes" + assert sm.read_skill_reference("login-flow", "references/notes.txt", owner="alice") is None def test_update_skill_positive_scoping(tmp_path): @@ -262,3 +246,61 @@ def test_update_skill_positive_scoping(tmp_path): assert "bob original" in after_bob and "alice updated" not in after_bob, ( "Bob's file was mutated by Alice's update_skill call — cross-tenant leak." ) + + +def test_add_skill_dedup_does_not_cross_owners(tmp_path): + sm = SkillsManager(str(tmp_path)) + first = sm.add_skill( + name="shared-flow", + description="same description", + category="general", + when_to_use="same trigger", + procedure=["same procedure"], + owner="alice", + source="learned", + ) + second = sm.add_skill( + name="shared-flow", + description="same description", + category="general", + when_to_use="same trigger", + procedure=["same procedure"], + owner="bob", + source="learned", + ) + + assert not first.get("_deduped") + assert not second.get("_deduped") + assert second.get("owner") == "bob" + + +def test_usage_sidecar_is_owner_scoped(tmp_path): + skills_root = tmp_path / "skills" + skills_root.mkdir(parents=True, exist_ok=True) + _write_skill_md( + skills_root, category="alice-cat", name="shared-flow", + owner="alice", description="alice secret", + ) + _write_skill_md( + skills_root, category="bob-cat", name="shared-flow", + owner="bob", description="bob secret", + ) + + sm = SkillsManager(str(tmp_path)) + sm.record_use("shared-flow", owner="alice") + sm.set_audit("shared-flow", "pass", by_teacher=False, owner="bob") + sm.set_necessity("shared-flow", False, ["other-flow"], "redundant", owner="bob") + + alice = sm.load(owner="alice")[0] + bob = sm.load(owner="bob")[0] + + assert alice["uses"] == 1 + assert alice["audit_verdict"] is None + assert alice["necessity"] is None + assert bob["uses"] == 0 + assert bob["audit_verdict"] == "pass" + assert bob["necessity"] == { + "necessary": False, + "redundant_with": ["other-flow"], + "reason": "redundant", + }