Scope skills usage by owner (#1312)

This commit is contained in:
Vykos
2026-06-02 19:27:43 +02:00
committed by GitHub
parent 1adf21a7e5
commit 5ee30cc144
5 changed files with 132 additions and 64 deletions

View File

@@ -463,7 +463,7 @@ async def _run_skill_test_job(key, name, md, task, url, model, headers, owner, s
if skills_manager is not None:
v = (job["verdict"] or {}).get("verdict") or "unknown"
try:
skills_manager.set_audit(name, v, by_teacher=False, worker_model=model)
skills_manager.set_audit(name, v, by_teacher=False, worker_model=model, owner=owner)
except Exception:
pass
conf = {"pass": 0.95, "needs_work": 0.6, "fail": 0.4}.get(v)
@@ -563,6 +563,7 @@ def _skill_duplicate_blocker(skills_manager, name: str, owner) -> Optional[str]:
False,
[keeper_name],
f"Lower-priority duplicate of {keeper_name}",
owner=owner,
)
except Exception:
pass
@@ -629,7 +630,7 @@ def _audit_finalize_status(skills_manager, name: str, owner, verdict: str,
if generic_reason:
necessary = False
try:
skills_manager.set_necessity(name, False, [], generic_reason)
skills_manager.set_necessity(name, False, [], generic_reason, owner=owner)
except Exception:
pass
duplicate_of = _skill_duplicate_blocker(skills_manager, name, owner) if verdict == "pass" else None
@@ -788,7 +789,8 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
nec = await _eval_skill_necessity(md, others, url, model, headers)
if nec is not None:
skills_manager.set_necessity(name, nec.get("necessary", True),
nec.get("redundant_with"), nec.get("reason"))
nec.get("redundant_with"), nec.get("reason"),
owner=owner)
if not nec.get("necessary", True):
log(f"{name}: possibly unnecessary — {nec.get('reason', '')[:80]}")
except Exception as e:
@@ -800,11 +802,11 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
reason = generic_reason or (f"Lower-priority duplicate of {duplicate_of}" if duplicate_of else str((nec or {}).get("reason") or "Unnecessary skill"))
try:
skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35}, owner=owner)
skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model, owner=owner)
if duplicate_of:
skills_manager.set_necessity(name, False, [duplicate_of], reason)
skills_manager.set_necessity(name, False, [duplicate_of], reason, owner=owner)
else:
skills_manager.set_necessity(name, False, [], reason)
skills_manager.set_necessity(name, False, [], reason, owner=owner)
except Exception:
pass
log(f"{name}: draft — skipped functional test ({reason[:100]})")
@@ -848,13 +850,13 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
if fixed and fixed.strip() != md.strip():
_apply_skill_md(skills_manager, name, fixed, owner)
_set_conf(0.95)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner)
refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.95, (refreshed or {}).get("necessity"), verdict)
log(f"{name}: {status} — confidence 95%")
return {"skill": name, "result": "pass", "verdict": verdict, "confidence": 0.95, "status": status}
if v in ("unknown", "inconclusive"):
skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model, owner=owner)
status = _audit_finalize_status(skills_manager, name, owner, "inconclusive", skill.get("confidence") or 0.0, skill.get("necessity"))
log(f"{name}: {status} — inconclusive")
return {"skill": name, "result": "inconclusive", "verdict": verdict, "status": status}
@@ -869,7 +871,7 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
log(f"{name}: retry (self) = {v}")
if v == "pass":
_set_conf(0.85)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner)
refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.85, (refreshed or {}).get("necessity"), verdict)
log(f"{name}: {status} — confidence 85% after self-edit")
@@ -893,7 +895,9 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
log(f"{name}: retry on student after teacher rewrite = {v}")
if v == "pass":
_set_conf(0.8)
skills_manager.set_audit(name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model)
skills_manager.set_audit(
name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model, owner=owner
)
refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.8, (refreshed or {}).get("necessity"), verdict)
log(f"{name}: {status} — confidence 80% after teacher rewrite")
@@ -908,6 +912,7 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
name, v or "fail", by_teacher=teacher_ran,
worker_model=model,
teacher_model=(teacher[1] if teacher_ran and teacher else ""),
owner=owner,
)
log(f"{name}: flagged — confidence lowered, kept as draft for manual review")
return {"skill": name, "result": "flagged", "verdict": verdict, "confidence": 0.35}
@@ -976,7 +981,7 @@ async def _run_audit_all_job(key, skills_manager, names, url, model, headers, te
job.pop("task", None)
def _resolve_audit_models():
def _resolve_audit_models(owner=None):
"""Resolve (url, model, headers, teacher) for an audit run from Settings.
Worker = Utility model (falling back to Default, normalized to a served
@@ -985,7 +990,7 @@ def _resolve_audit_models():
ValueError if no worker model.
"""
from src.endpoint_resolver import resolve_endpoint
url, model, headers = resolve_endpoint("utility")
url, model, headers = resolve_endpoint("utility", owner=owner)
if not url or not model:
raise ValueError("No model configured — set a Default or Utility model in Settings.")
try:
@@ -1029,7 +1034,7 @@ async def run_scheduled_skill_audit(skills_manager: SkillsManager,
return {"status": "running", "skipped": True}
try:
url, model, headers, teacher = _resolve_audit_models()
url, model, headers, teacher = _resolve_audit_models(owner=owner)
except ValueError as e:
logger.info(f"Scheduled skill audit skipped — {e}")
return {"status": "skipped", "reason": str(e)}
@@ -1280,7 +1285,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
# Prefer the configured DEFAULT (→ Utility) model — not the current chat
# session's model. Fall back to the caller's session model only if unset.
url, model, headers = resolve_endpoint("default")
url, model, headers = resolve_endpoint("default", owner=user)
if not url or not model:
url = url or ((body.get("endpoint_url") or "").strip() or None)
model = model or ((body.get("model") or "").strip() or None)
@@ -1360,7 +1365,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
# Worker model (Default, normalized) + optional teacher — shared resolver.
try:
url, model, headers, teacher = _resolve_audit_models()
url, model, headers, teacher = _resolve_audit_models(owner=user)
except ValueError as e:
raise HTTPException(400, str(e))

View File

@@ -6,8 +6,8 @@ YAML frontmatter and a structured markdown body (When to Use / Procedure /
Pitfalls / Verification). See `skill_format.py` for the format.
Usage counters (`uses`, `last_used`) live in a sidecar
`data/skills/_usage.json` keyed by skill name so the SKILL.md content
doesn't churn on every retrieval.
`data/skills/_usage.json` keyed by owner plus skill name so the SKILL.md
content doesn't churn on every retrieval.
Ownership: skills declare `owner: <username>` in frontmatter. Single-user
deployments can leave that blank.
@@ -105,14 +105,29 @@ class SkillsManager:
json.dump(usage, f, indent=2)
os.replace(tmp, self.usage_file)
@staticmethod
def _usage_key(name: str, owner: Optional[str] = None) -> str:
# Skill names are not globally unique once multiple owners are present.
# Keep the usage sidecar keyed the same way the skill file is scoped.
return f"{owner}::{name}" if owner else name
def _usage_entry(self, usage: Dict[str, Dict], name: str, owner: Optional[str] = None) -> Dict:
key = self._usage_key(name, owner)
entry = usage.get(key)
if isinstance(entry, dict):
return entry
return {}
def set_audit(self, name: str, verdict: str, by_teacher: bool = False,
worker_model: str = "", teacher_model: str = "") -> None:
worker_model: str = "", teacher_model: str = "",
owner: Optional[str] = None) -> None:
"""Record the last test/audit result for a skill in the usage sidecar
(so it surfaces in load() without touching SKILL.md). Drives the
'verified' check + teacher mark on the card."""
import time as _t
usage = self._load_usage()
e = usage.setdefault(name, {"uses": 0, "last_used": None})
key = self._usage_key(name, owner)
e = usage.setdefault(key, {"uses": 0, "last_used": None})
e["audit_verdict"] = verdict
e["audit_by_teacher"] = bool(by_teacher)
if worker_model:
@@ -123,11 +138,13 @@ class SkillsManager:
self._save_usage(usage)
def set_necessity(self, name: str, necessary: bool,
redundant_with=None, reason: str = "") -> None:
redundant_with=None, reason: str = "",
owner: Optional[str] = None) -> None:
"""Record the advisory 'is this skill necessary?' judgment in the usage
sidecar. Surfaced on the card as a flag; never acts on the skill."""
usage = self._load_usage()
e = usage.setdefault(name, {"uses": 0, "last_used": None})
key = self._usage_key(name, owner)
e = usage.setdefault(key, {"uses": 0, "last_used": None})
e["necessity"] = {
"necessary": bool(necessary),
"redundant_with": list(redundant_with or []),
@@ -207,7 +224,7 @@ class SkillsManager:
if not sk:
continue
d = sk.to_dict()
u = usage.get(sk.name) or {}
u = self._usage_entry(usage, sk.name, sk.owner)
d["uses"] = int(u.get("uses", 0))
d["last_used"] = u.get("last_used")
d["audit_verdict"] = u.get("audit_verdict")
@@ -308,6 +325,7 @@ class SkillsManager:
# never auto-skipped — a human asked for it. The every-X AI audit
# handles the fuzzier near-duplicates this cheap check won't catch.
_all = self.load_all()
_dedup_pool = _all if owner is None else [s for s in _all if s.get("owner") == owner]
if source != "user":
cand = _tokenize(" ".join([
nm, (description or title or ""),
@@ -315,7 +333,7 @@ class SkillsManager:
" ".join(procedure if procedure is not None else (steps or [])),
]))
if cand:
for s in _all:
for s in _dedup_pool:
ex = _tokenize(" ".join([
s.get("name", ""), s.get("description", ""),
s.get("when_to_use", ""),
@@ -326,7 +344,7 @@ class SkillsManager:
# existing skill's usage and return it so the caller
# knows it already exists.
try:
self.record_use(s["name"])
self.record_use(s["name"], owner=s.get("owner"))
except Exception:
pass
return {**s, "_deduped": True, "_duplicate_of": s.get("name")}
@@ -428,8 +446,9 @@ class SkillsManager:
os.rename(old_dir, new_dir)
# Also rename usage key
usage = self._load_usage()
if skill_id in usage:
usage[sk.name] = usage.pop(skill_id)
old_usage_key = self._usage_key(skill_id, sk.owner)
if old_usage_key in usage:
usage[self._usage_key(sk.name, sk.owner)] = usage.pop(old_usage_key)
self._save_usage(usage)
self._write_skill(sk)
return True
@@ -455,15 +474,17 @@ class SkillsManager:
logger.warning(f"Failed to remove skill dir {skill_dir}: {e}")
return False
usage = self._load_usage()
if skill_id in usage:
del usage[skill_id]
usage_key = self._usage_key(skill_id, sk.owner)
if usage_key in usage:
del usage[usage_key]
self._save_usage(usage)
return True
return False
def record_use(self, skill_id: str) -> None:
def record_use(self, skill_id: str, owner: Optional[str] = None) -> None:
usage = self._load_usage()
entry = usage.setdefault(skill_id, {"uses": 0, "last_used": None})
key = self._usage_key(skill_id, owner)
entry = usage.setdefault(key, {"uses": 0, "last_used": None})
entry["uses"] = int(entry.get("uses", 0)) + 1
entry["last_used"] = int(time.time())
self._save_usage(usage)

View File

@@ -862,7 +862,7 @@ def _build_system_prompt(
# matter how often it's been matched and applied.
for _sk in relevant_skills:
try:
sm.record_use(_sk.get('name', ''))
sm.record_use(_sk.get('name', ''), owner=owner)
except Exception:
pass
lines.append("## Relevant skills for this request")

View File

@@ -1313,7 +1313,7 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
if not names:
raise TaskNoop("no skills to test")
url, model, headers = resolve_endpoint("default")
url, model, headers = resolve_endpoint("default", owner=owner)
if not url or not model:
return "No Default/Utility model configured — set one in Settings.", False
@@ -1374,7 +1374,7 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
# user-set value (e.g. 1.0 → 0.95) is destructive.
if v in ("pass", "needs_work", "fail", "inconclusive"):
try:
sm.set_audit(name, v, by_teacher=False, worker_model=model)
sm.set_audit(name, v, by_teacher=False, worker_model=model, owner=owner)
except Exception as _e:
logger.warning(f"test_skills set_audit({name}) failed: {_e}")
if v == "unknown":

View File

@@ -24,7 +24,6 @@ silently mutates a file owned by a different user AND overwrites the
import os
import sys
import textwrap
import types
from pathlib import Path
from unittest.mock import MagicMock
@@ -33,28 +32,13 @@ import pytest
# ── module-load stubbing (matches other tests in this repo) ──────────
# Stub heavy deps so importing the skills manager doesn't pull DB / FastAPI.
for _mod in [
"sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext",
"sqlalchemy.ext.declarative", "src.database",
"core.atomic_io", # we'll patch atomic_write_text below
]:
for _mod in ("sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", "sqlalchemy.ext.declarative"):
if _mod not in sys.modules:
try:
__import__(_mod)
except ImportError:
sys.modules[_mod] = MagicMock()
# Provide a no-op atomic_write_text for SkillsManager._write_skill.
def _fake_atomic_write_text(path, content, **kw):
Path(path).parent.mkdir(parents=True, exist_ok=True)
Path(path).write_text(content, encoding="utf-8")
_fake_core = types.ModuleType("core.atomic_io")
_fake_core.atomic_write_text = _fake_atomic_write_text
_fake_core.atomic_write_json = lambda p, d, **kw: Path(p).write_text(
"{}", encoding="utf-8"
)
sys.modules["core.atomic_io"] = _fake_core
from services.memory.skills import SkillsManager # noqa: E402
from services.memory.skill_format import Skill, slugify # noqa: E402
@@ -195,13 +179,12 @@ def test_update_skill_scalar_keys_exclude_owner():
)
def test_read_skill_md_scopes_to_owner(tmp_path):
def test_read_skill_md_and_references_are_owner_scoped(tmp_path):
"""Two users own distinct skills with the same slug. read_skill_md()
called with owner='alice' must return Alice's content, not Bob's.
Called without an owner it must match only ownerless skills."""
skills_root = tmp_path / "skills"
skills_root.mkdir(parents=True, exist_ok=True)
alice_path = _write_skill_md(
skills_root, category="alice-cat", name="login-flow",
owner="alice", description="alice secret",
@@ -210,27 +193,28 @@ def test_read_skill_md_scopes_to_owner(tmp_path):
skills_root, category="bob-cat", name="login-flow",
owner="bob", description="bob secret",
)
refs = bob_path.parent / "references"
refs.mkdir()
(refs / "notes.txt").write_text("bob private notes", encoding="utf-8")
sm = SkillsManager(str(tmp_path))
alice_md = sm.read_skill_md("login-flow", owner="alice")
assert alice_md is not None, "read_skill_md returned None for alice's skill"
assert "alice secret" in alice_md, (
f"read_skill_md(owner='alice') returned the wrong file: {alice_md[:200]}"
)
assert "alice secret" in alice_md
bob_md = sm.read_skill_md("login-flow", owner="bob")
assert bob_md is not None, "read_skill_md returned None for bob's skill"
assert "bob secret" in bob_md, (
f"read_skill_md(owner='bob') returned the wrong file: {bob_md[:200]}"
)
assert "bob secret" in bob_md
no_owner_md = sm.read_skill_md("login-flow")
assert no_owner_md is None, (
"read_skill_md without owner matched an owned skill — "
"default should only match ownerless skills. Got: "
f"{no_owner_md[:200] if no_owner_md else 'None'}"
"default should only match ownerless skills."
)
assert sm.read_skill_md("login-flow", owner="charlie") is None
assert sm.read_skill_reference("login-flow", "references/notes.txt", owner="bob") == "bob private notes"
assert sm.read_skill_reference("login-flow", "references/notes.txt", owner="alice") is None
def test_update_skill_positive_scoping(tmp_path):
@@ -262,3 +246,61 @@ def test_update_skill_positive_scoping(tmp_path):
assert "bob original" in after_bob and "alice updated" not in after_bob, (
"Bob's file was mutated by Alice's update_skill call — cross-tenant leak."
)
def test_add_skill_dedup_does_not_cross_owners(tmp_path):
sm = SkillsManager(str(tmp_path))
first = sm.add_skill(
name="shared-flow",
description="same description",
category="general",
when_to_use="same trigger",
procedure=["same procedure"],
owner="alice",
source="learned",
)
second = sm.add_skill(
name="shared-flow",
description="same description",
category="general",
when_to_use="same trigger",
procedure=["same procedure"],
owner="bob",
source="learned",
)
assert not first.get("_deduped")
assert not second.get("_deduped")
assert second.get("owner") == "bob"
def test_usage_sidecar_is_owner_scoped(tmp_path):
skills_root = tmp_path / "skills"
skills_root.mkdir(parents=True, exist_ok=True)
_write_skill_md(
skills_root, category="alice-cat", name="shared-flow",
owner="alice", description="alice secret",
)
_write_skill_md(
skills_root, category="bob-cat", name="shared-flow",
owner="bob", description="bob secret",
)
sm = SkillsManager(str(tmp_path))
sm.record_use("shared-flow", owner="alice")
sm.set_audit("shared-flow", "pass", by_teacher=False, owner="bob")
sm.set_necessity("shared-flow", False, ["other-flow"], "redundant", owner="bob")
alice = sm.load(owner="alice")[0]
bob = sm.load(owner="bob")[0]
assert alice["uses"] == 1
assert alice["audit_verdict"] is None
assert alice["necessity"] is None
assert bob["uses"] == 0
assert bob["audit_verdict"] == "pass"
assert bob["necessity"] == {
"necessary": False,
"redundant_with": ["other-flow"],
"reason": "redundant",
}