#!/usr/bin/env python3
"""odysseus-research — shell wrapper for deep-research sessions.

Each research run is a JSON blob in `data/deep_research/<id>.json`
holding the query, findings, sources, and final report. This CLI
enumerates and inspects them — running new research requires the
streaming endpoint, so it's not exposed here.

    odysseus-research list [--limit N] [--status complete|running|cancelled]
    odysseus-research show RP_ID            # full record (large)
    odysseus-research report RP_ID --raw    # just the markdown report
    odysseus-research search "query text"
    odysseus-research delete RP_ID
"""

from __future__ import annotations
import sys
import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "_lib"))
from cli import quiet_logs, emit, fail, common_parser, run, REPO_ROOT as _REPO_ROOT
quiet_logs()

import argparse, json, logging, os, sys
from pathlib import Path

_DATA_DIR = _REPO_ROOT / "data" / "deep_research"

# The CLI's --status takes the user-facing label "complete", but the writer
# in services/research/research_handler.py stores `status="done"` when a run
# finishes (and the legacy src/research_handler.py does the same). Without
# this alias, --status complete filters every finished record out and the
# user sees an empty list. Map at filter time so the on-disk corpus is the
# source of truth and the CLI surface stays the friendlier word. The other
# choices ("running", "cancelled", "error") are stored verbatim, so they
# fall through unchanged.
_STATUS_CLI_TO_STORED = {"complete": "done"}


def _status_matches(stored, requested: str) -> bool:
    stored = (stored or "")
    if not isinstance(stored, str):
        stored = ""
    target = _STATUS_CLI_TO_STORED.get(requested, requested)
    return stored == target


def _load_path(path: Path) -> dict | None:
    try:
        data = json.loads(path.read_text())
    except (json.JSONDecodeError, OSError):
        return None
    return data if isinstance(data, dict) else None


def _load(rp_id: str) -> dict | None:
    path = _DATA_DIR / f"{rp_id}.json"
    if not path.exists():
        return None
    return _load_path(path)


def _preview_text(value, limit: int = 200) -> str:
    """Truncated preview tolerant of non-string values. A stored research
    record whose ``query`` is a non-string (legacy/corrupt JSON) would crash
    ``(value or "")[:200]`` with a TypeError; coerce non-strings to ""."""
    text = value if isinstance(value, str) else ""
    return text[:limit]


def _summarize(rp_id: str, data: dict) -> dict:
    return {
        "id": rp_id,
        "query": _preview_text(data.get("query")),
        "category": data.get("category") or "",
        "status": data.get("status") or "",
        "started_at": data.get("started_at") or "",
        "completed_at": data.get("completed_at") or "",
        "sources": len(data.get("sources") or []),
        "stats": data.get("stats") or {},
    }


def cmd_list(args):
    if not _DATA_DIR.is_dir():
        emit([], args)
        return
    out = []
    for path in sorted(_DATA_DIR.glob("*.json")):
        rp_id = path.stem
        data = _load_path(path)
        if data is None:
            continue
        if args.status and not _status_matches(data.get("status"), args.status):
            continue
        out.append(_summarize(rp_id, data))
    out.sort(key=lambda r: r.get("started_at") or "", reverse=True)
    emit(out[: args.limit], args)


def cmd_show(args):
    data = _load(args.id)
    if data is None:
        fail(f"no research session {args.id!r}")
    emit(data, args)


def cmd_report(args):
    data = _load(args.id)
    if data is None:
        fail(f"no research session {args.id!r}")
    report = data.get("result") or data.get("raw_report") or ""
    if args.raw:
        sys.stdout.write(report)
        if not report.endswith("\n"):
            sys.stdout.write("\n")
        return
    emit({
        "id": args.id,
        "query": data.get("query") or "",
        "report": report,
        "sources": data.get("sources") or [],
    }, args)


def cmd_search(args):
    if not _DATA_DIR.is_dir():
        emit([], args)
        return
    q = args.query.lower()
    out = []
    for path in _DATA_DIR.glob("*.json"):
        rp_id = path.stem
        data = _load_path(path)
        if data is None:
            continue
        haystack = " ".join([
            (data.get("query") or "").lower(),
            (data.get("result") or "").lower(),
            (data.get("category") or "").lower(),
        ])
        if q in haystack:
            out.append(_summarize(rp_id, data))
    out.sort(key=lambda r: r.get("started_at") or "", reverse=True)
    emit(out[: args.limit], args)


def cmd_delete(args):
    path = _DATA_DIR / f"{args.id}.json"
    if not path.exists():
        fail(f"no research session {args.id!r}")
    snap_summary = _summarize(args.id, _load(args.id) or {})
    path.unlink()
    emit({"ok": True, "deleted": snap_summary}, args)


def _build_parser():
    common = argparse.ArgumentParser(add_help=False)
    common.add_argument("--pretty", action="store_true")
    p = argparse.ArgumentParser(prog="odysseus-research", parents=[common])
    sub = p.add_subparsers(dest="cmd", required=True)

    pl = sub.add_parser("list", parents=[common])
    pl.add_argument("--status", choices=["complete", "running", "cancelled", "error"])
    pl.add_argument("--limit", type=int, default=50)
    pl.set_defaults(func=cmd_list)

    psh = sub.add_parser("show", parents=[common])
    psh.add_argument("id")
    psh.set_defaults(func=cmd_show)

    pr = sub.add_parser("report", parents=[common])
    pr.add_argument("id")
    pr.add_argument("--raw", action="store_true", help="write raw markdown to stdout")
    pr.set_defaults(func=cmd_report)

    ps = sub.add_parser("search", parents=[common])
    ps.add_argument("query")
    ps.add_argument("--limit", type=int, default=50)
    ps.set_defaults(func=cmd_search)

    pd = sub.add_parser("delete", parents=[common])
    pd.add_argument("id")
    pd.set_defaults(func=cmd_delete)

    return p


if __name__ == "__main__":
    sys.exit(run(_build_parser()))
