Odysseus v1.0

2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions
--- a/services/memory/skill_format.py
+++ b/services/memory/skill_format.py
@@ -0,0 +1,444 @@
+"""SKILL.md parser & writer.
+
+Reads/writes a single skill from a `SKILL.md` file with YAML frontmatter
+and a structured markdown body. Inspired by Hermes' skills format
+(https://hermes-agent.nousresearch.com/docs/user-guide/features/skills).
+
+Frontmatter shape (YAML):
+
+    ---
+    name: open-pr-from-branch
+    description: One-line summary surfaced in the skills index.
+    version: 1.0.0
+    category: dev
+    tags: [git, github]
+    platforms: [linux, macos]            # optional
+    requires_toolsets: []                # optional
+    fallback_for_toolsets: []            # optional
+    status: published                    # draft | published
+    confidence: 0.8                      # 0..1
+    source: learned                      # learned | taught | imported
+    teacher_model: claude-opus-4-7       # optional
+    created: 2026-05-09T21:43:00Z
+    ---
+
+Body sections (any subset; rendered as headings):
+
+    ## When to Use
+    Trigger conditions in plain English.
+
+    ## Procedure
+    1. First step
+    2. Second step
+
+    ## Pitfalls
+    - Common failure mode + how to recover
+
+    ## Verification
+    - How to confirm success
+
+    Anything else (raw paragraphs after the last known section) is preserved
+    in `body_extra` and round-trips on save.
+
+Usage counters (`uses`, `last_used`) live in a sidecar `_usage.json` keyed
+by skill name, so the SKILL.md file doesn't churn on every retrieval.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Slugify
+# ---------------------------------------------------------------------------
+
+_SLUG_RE = re.compile(r"[^a-z0-9]+")
+
+
+def slugify(text: str, fallback: str = "skill") -> str:
+    """Convert a free-form title to a kebab-case slug suitable for a directory
+    name. Strips non-alphanumerics, collapses runs, trims leading/trailing
+    dashes. Caps at 60 chars."""
+    s = str(text or "").strip().lower()
+    s = _SLUG_RE.sub("-", s)
+    s = s.strip("-")
+    return (s or fallback)[:60]
+
+
+# ---------------------------------------------------------------------------
+# Frontmatter (minimal YAML — we don't pull in PyYAML for one feature)
+# ---------------------------------------------------------------------------
+
+# We accept a tiny subset of YAML: scalar `key: value`, inline lists `[a, b]`,
+# and block lists with `-`. That covers everything in our schema and avoids
+# a new dependency.
+
+_FM_KEY_RE = re.compile(r"^([a-z_][a-z0-9_]*):\s*(.*)$", re.IGNORECASE)
+_FM_BLOCK_LIST_RE = re.compile(r"^\s*-\s*(.*)$")
+
+
+def _parse_scalar(raw: str) -> Any:
+    raw = raw.strip()
+    if raw == "":
+        return ""
+    if raw.startswith("[") and raw.endswith("]"):
+        inner = raw[1:-1].strip()
+        if not inner:
+            return []
+        return [_parse_scalar(p) for p in _split_top_level(inner, ",")]
+    if raw.lower() in ("true", "yes"):
+        return True
+    if raw.lower() in ("false", "no"):
+        return False
+    if raw.lower() in ("null", "none", "~"):
+        return None
+    if (raw[0] == raw[-1]) and raw[0] in ("'", '"'):
+        return raw[1:-1]
+    # Try number
+    try:
+        if "." in raw:
+            return float(raw)
+        return int(raw)
+    except ValueError:
+        pass
+    return raw
+
+
+def _split_top_level(s: str, sep: str) -> List[str]:
+    """Split `s` on `sep` ignoring separators inside [] or quotes."""
+    out, buf, depth, quote = [], [], 0, None
+    for ch in s:
+        if quote:
+            buf.append(ch)
+            if ch == quote:
+                quote = None
+            continue
+        if ch in ("'", '"'):
+            quote = ch
+            buf.append(ch)
+            continue
+        if ch == "[":
+            depth += 1
+        elif ch == "]":
+            depth = max(0, depth - 1)
+        if ch == sep and depth == 0:
+            out.append("".join(buf).strip())
+            buf = []
+            continue
+        buf.append(ch)
+    if buf:
+        out.append("".join(buf).strip())
+    return out
+
+
+def parse_frontmatter(text: str) -> tuple[Dict[str, Any], str]:
+    """Pull the YAML frontmatter out of a SKILL.md and return (fm, body)."""
+    if not text.startswith("---"):
+        return {}, text
+    end = text.find("\n---", 3)
+    if end < 0:
+        return {}, text
+    fm_text = text[3:end].lstrip("\n")
+    body = text[end + 4:].lstrip("\n")
+    fm: Dict[str, Any] = {}
+    pending_key: Optional[str] = None
+    for line in fm_text.splitlines():
+        if not line.strip() or line.lstrip().startswith("#"):
+            continue
+        m = _FM_KEY_RE.match(line)
+        if m:
+            key, val = m.group(1), m.group(2)
+            if val.strip() == "":
+                pending_key = key
+                fm[key] = []
+            else:
+                fm[key] = _parse_scalar(val)
+                pending_key = None
+            continue
+        m2 = _FM_BLOCK_LIST_RE.match(line)
+        if m2 and pending_key:
+            existing = fm.get(pending_key)
+            if not isinstance(existing, list):
+                fm[pending_key] = []
+            fm[pending_key].append(_parse_scalar(m2.group(1)))
+    return fm, body
+
+
+def _emit_scalar(v: Any) -> str:
+    if v is None:
+        return "null"
+    if isinstance(v, bool):
+        return "true" if v else "false"
+    if isinstance(v, (int, float)):
+        return str(v)
+    if isinstance(v, list):
+        return "[" + ", ".join(_emit_scalar(x) for x in v) + "]"
+    s = str(v)
+    if any(c in s for c in (":", "#", "\n", "[", "]", "{", "}", ",", "&", "*", "!", "|", ">", "'", '"', "%", "@")):
+        return json.dumps(s)
+    return s
+
+
+def _as_list(v: Any) -> List[str]:
+    if v is None:
+        return []
+    if isinstance(v, list):
+        return [str(x) for x in v if x not in (None, "")]
+    return [str(v)]
+
+
+def _as_float(v: Any, default: float = 0.8) -> float:
+    try:
+        return float(v)
+    except (TypeError, ValueError):
+        return default
+
+
+def emit_frontmatter(fm: Dict[str, Any]) -> str:
+    lines = []
+    for k, v in fm.items():
+        if v is None or v == [] or v == "":
+            continue
+        lines.append(f"{k}: {_emit_scalar(v)}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Skill body sections
+# ---------------------------------------------------------------------------
+
+_KNOWN_SECTIONS = ("when_to_use", "procedure", "pitfalls", "verification")
+_HEADING_TO_KEY = {
+    "when to use": "when_to_use",
+    "procedure": "procedure",
+    "steps": "procedure",
+    "pitfalls": "pitfalls",
+    "verification": "verification",
+}
+_KEY_TO_HEADING = {
+    "when_to_use": "When to Use",
+    "procedure": "Procedure",
+    "pitfalls": "Pitfalls",
+    "verification": "Verification",
+}
+
+
+def parse_body(body: str) -> Dict[str, Any]:
+    """Split a SKILL.md body into known sections.
+
+    Returns:
+        {
+            "when_to_use": str,
+            "procedure":   list[str],   # numbered/bulleted lines
+            "pitfalls":    list[str],
+            "verification": list[str],
+            "body_extra":  str,         # anything not under a known heading
+        }
+    """
+    out = {k: ([] if k != "when_to_use" else "") for k in _KNOWN_SECTIONS}
+    out["body_extra"] = ""
+    if not body or not body.strip():
+        return out
+
+    sections: List[tuple[Optional[str], List[str]]] = [(None, [])]
+    for line in body.splitlines():
+        m = re.match(r"^##\s+(.*?)\s*$", line)
+        if m:
+            heading = m.group(1).strip().lower()
+            key = _HEADING_TO_KEY.get(heading)
+            sections.append((key, []))
+            continue
+        sections[-1][1].append(line)
+
+    for key, lines in sections:
+        text = "\n".join(lines).strip("\n")
+        if key is None:
+            extras = text.strip()
+            if extras:
+                out["body_extra"] = (out["body_extra"] + "\n\n" + extras).strip()
+            continue
+        if key == "when_to_use":
+            out["when_to_use"] = text.strip()
+        else:
+            out[key] = _parse_list_lines(text)
+    return out
+
+
+def _parse_list_lines(text: str) -> List[str]:
+    """Pull bullet/numbered lines out of a section body. Plain paragraphs are
+    treated as a single entry."""
+    items: List[str] = []
+    for line in (text or "").splitlines():
+        s = line.strip()
+        if not s:
+            continue
+        m = re.match(r"^(?:[-*]|\d+[.)])\s+(.*)$", s)
+        if m:
+            items.append(m.group(1).strip())
+        elif items:
+            # continuation of previous bullet
+            items[-1] = items[-1] + " " + s
+        else:
+            items.append(s)
+    return items
+
+
+def emit_body(sections: Dict[str, Any]) -> str:
+    parts: List[str] = []
+    when = (sections.get("when_to_use") or "").strip()
+    if when:
+        parts.append(f"## {_KEY_TO_HEADING['when_to_use']}\n\n{when}")
+    for key in ("procedure", "pitfalls", "verification"):
+        items = sections.get(key) or []
+        if not items:
+            continue
+        heading = _KEY_TO_HEADING[key]
+        if key == "procedure":
+            body = "\n".join(f"{i + 1}. {x}" for i, x in enumerate(items))
+        else:
+            body = "\n".join(f"- {x}" for x in items)
+        parts.append(f"## {heading}\n\n{body}")
+    extra = (sections.get("body_extra") or "").strip()
+    if extra:
+        parts.append(extra)
+    return "\n\n".join(parts) + ("\n" if parts else "")
+
+
+# ---------------------------------------------------------------------------
+# Skill record
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class Skill:
+    name: str                                          # slug, dir name
+    description: str = ""
+    version: str = "1.0.0"
+    category: str = "general"
+    tags: List[str] = field(default_factory=list)
+    platforms: List[str] = field(default_factory=list)
+    requires_toolsets: List[str] = field(default_factory=list)
+    fallback_for_toolsets: List[str] = field(default_factory=list)
+    status: str = "draft"                              # draft | published
+    confidence: float = 0.8
+    source: str = "learned"
+    teacher_model: Optional[str] = None
+    owner: Optional[str] = None
+    created: str = ""                                  # ISO8601
+    when_to_use: str = ""
+    procedure: List[str] = field(default_factory=list)
+    pitfalls: List[str] = field(default_factory=list)
+    verification: List[str] = field(default_factory=list)
+    body_extra: str = ""
+    # Sidecar (not persisted in SKILL.md)
+    uses: int = 0
+    last_used: Optional[int] = None
+    # File path on disk (set when read)
+    path: Optional[str] = None
+
+    # ----------------------------------------------------------------------
+    # Serialization
+    # ----------------------------------------------------------------------
+
+    def to_frontmatter(self) -> Dict[str, Any]:
+        fm: Dict[str, Any] = {
+            "name": self.name,
+            "description": self.description,
+            "version": self.version,
+            "category": self.category,
+        }
+        if self.tags:                  fm["tags"] = list(self.tags)
+        if self.platforms:             fm["platforms"] = list(self.platforms)
+        if self.requires_toolsets:     fm["requires_toolsets"] = list(self.requires_toolsets)
+        if self.fallback_for_toolsets: fm["fallback_for_toolsets"] = list(self.fallback_for_toolsets)
+        fm["status"] = self.status
+        fm["confidence"] = round(float(self.confidence), 3)
+        fm["source"] = self.source
+        if self.teacher_model: fm["teacher_model"] = self.teacher_model
+        if self.owner:         fm["owner"] = self.owner
+        fm["created"] = self.created or _now_iso()
+        return fm
+
+    def to_dict(self) -> Dict[str, Any]:
+        d = {
+            "id": self.name,        # slug doubles as id
+            "name": self.name,
+            "description": self.description,
+            "version": self.version,
+            "category": self.category,
+            "tags": list(self.tags),
+            "platforms": list(self.platforms),
+            "requires_toolsets": list(self.requires_toolsets),
+            "fallback_for_toolsets": list(self.fallback_for_toolsets),
+            "status": self.status,
+            "confidence": round(float(self.confidence), 3),
+            "source": self.source,
+            "teacher_model": self.teacher_model,
+            "owner": self.owner,
+            "created": self.created,
+            "when_to_use": self.when_to_use,
+            "procedure": list(self.procedure),
+            "pitfalls": list(self.pitfalls),
+            "verification": list(self.verification),
+            "body_extra": self.body_extra,
+            "uses": int(self.uses or 0),
+            "last_used": self.last_used,
+            "path": self.path,
+        }
+        # Back-compat aliases for the old API/UI
+        d["title"] = self.description or self.name.replace("-", " ").title()
+        d["problem"] = self.when_to_use
+        d["solution"] = (self.procedure[0] if self.procedure else "") if not self.body_extra else self.body_extra
+        d["steps"] = list(self.procedure)
+        return d
+
+    @classmethod
+    def from_markdown(cls, text: str, *, path: Optional[str] = None) -> "Skill":
+        fm, body = parse_frontmatter(text)
+        sections = parse_body(body)
+        raw_name = fm.get("name")
+        name = slugify(raw_name if raw_name not in (None, "") else fm.get("description", ""), fallback="skill")
+        return cls(
+            name=name,
+            description=str(fm.get("description", "") or ""),
+            version=str(fm.get("version", "1.0.0") or "1.0.0"),
+            category=str(fm.get("category", "general") or "general"),
+            tags=_as_list(fm.get("tags")),
+            platforms=_as_list(fm.get("platforms")),
+            requires_toolsets=_as_list(fm.get("requires_toolsets")),
+            fallback_for_toolsets=_as_list(fm.get("fallback_for_toolsets")),
+            status=str(fm.get("status", "draft") or "draft"),
+            confidence=_as_float(fm.get("confidence", 0.8), 0.8),
+            source=str(fm.get("source", "learned") or "learned"),
+            teacher_model=str(fm.get("teacher_model")) if fm.get("teacher_model") else None,
+            owner=str(fm.get("owner")) if fm.get("owner") else None,
+            created=str(fm.get("created") or _now_iso()),
+            when_to_use=sections["when_to_use"],
+            procedure=list(sections["procedure"]),
+            pitfalls=list(sections["pitfalls"]),
+            verification=list(sections["verification"]),
+            body_extra=sections["body_extra"],
+            path=path,
+        )
+
+    def to_markdown(self) -> str:
+        fm = emit_frontmatter(self.to_frontmatter())
+        body = emit_body({
+            "when_to_use": self.when_to_use,
+            "procedure": self.procedure,
+            "pitfalls": self.pitfalls,
+            "verification": self.verification,
+            "body_extra": self.body_extra,
+        })
+        return f"---\n{fm}\n---\n\n{body}"
+
+
+def _now_iso() -> str:
+    return datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")