From 17b62a3dba66de9edac6d896960a4637fe923611 Mon Sep 17 00:00:00 2001
From: tanmayraut45 <tanmayraut0@gmail.com>
Date: Fri, 5 Jun 2026 13:20:33 +0530
Subject: [PATCH] Research CLI: alias `--status complete` to the stored `done`
 value (#2515)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`odysseus-research list --status complete` returns an empty result on
any real corpus. The CLI accepts `complete` as a `--status` choice (the
user-facing label), but the writer in
`services/research/research_handler.py` stores `status="done"` when a
run finishes (and the legacy `src/research_handler.py` copy does the
same). The list filter at `scripts/odysseus-research` was a literal
string compare:

    if args.status and (data.get("status") or "") != args.status:
        continue

so `--status complete` filtered every finished record out, and the user
saw nothing — even though `odysseus-research list` (no filter) listed
them fine and `show RP_ID` worked on the same files. The other
documented choices — `running`, `cancelled`, `error` — are stored
verbatim by the writer, so the surface mismatch is just on `complete`.

Add a small `_STATUS_CLI_TO_STORED = {"complete": "done"}` map and run
`data.get("status")` through `_status_matches(...)` before comparing.
The other CLI choices fall through unchanged, so the filter still
matches them verbatim. A `None` or non-string `status` (corrupt JSON)
is coerced to `""` and never matches `complete`, so a half-written
record can't sneak past the filter.

`tests/test_research_cli_status_filter.py` covers all four documented
choices, the non-string / missing status case, and pins that the
verbatim choices are NOT rewritten — a blanket mapping that turned
every CLI choice into a stored variant would just re-introduce the
empty-result bug on the running/cancelled/error paths.

Part of #2122.
---
 scripts/odysseus-research                |  20 ++++-
 tests/test_research_cli_status_filter.py | 106 +++++++++++++++++++++++
 2 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_research_cli_status_filter.py

diff --git a/scripts/odysseus-research b/scripts/odysseus-research
index f483f3c..b0d1f0c 100755
--- a/scripts/odysseus-research
+++ b/scripts/odysseus-research
@@ -25,6 +25,24 @@ from pathlib import Path
 
 _DATA_DIR = _REPO_ROOT / "data" / "deep_research"
 
+# The CLI's --status takes the user-facing label "complete", but the writer
+# in services/research/research_handler.py stores `status="done"` when a run
+# finishes (and the legacy src/research_handler.py does the same). Without
+# this alias, --status complete filters every finished record out and the
+# user sees an empty list. Map at filter time so the on-disk corpus is the
+# source of truth and the CLI surface stays the friendlier word. The other
+# choices ("running", "cancelled", "error") are stored verbatim, so they
+# fall through unchanged.
+_STATUS_CLI_TO_STORED = {"complete": "done"}
+
+
+def _status_matches(stored, requested: str) -> bool:
+    stored = (stored or "")
+    if not isinstance(stored, str):
+        stored = ""
+    target = _STATUS_CLI_TO_STORED.get(requested, requested)
+    return stored == target
+
 
 def _load_path(path: Path) -> dict | None:
     try:
@@ -72,7 +90,7 @@ def cmd_list(args):
         data = _load_path(path)
         if data is None:
             continue
-        if args.status and (data.get("status") or "") != args.status:
+        if args.status and not _status_matches(data.get("status"), args.status):
             continue
         out.append(_summarize(rp_id, data))
     out.sort(key=lambda r: r.get("started_at") or "", reverse=True)
diff --git a/tests/test_research_cli_status_filter.py b/tests/test_research_cli_status_filter.py
new file mode 100644
index 0000000..a406a8b
--- /dev/null
+++ b/tests/test_research_cli_status_filter.py
@@ -0,0 +1,106 @@
+"""`odysseus-research list --status complete` was returning nothing.
+
+The CLI's `--status` argparse choice is "complete" — that is the user-facing
+label — but the writer in `services/research/research_handler.py` stores
+`status="done"` for a finished run (and the older `src/research_handler.py`
+copy does the same). The list filter was a literal string compare, so
+`--status complete` matched zero records on any real on-disk corpus.
+
+These tests pin the alias so the friendlier CLI word keeps matching the
+stored value. The other choices (`running`, `cancelled`, `error`) are
+stored verbatim, so they must NOT be rewritten by the alias map.
+
+Part of #2122 (odysseus-* CLI list/search bugs).
+"""
+
+from __future__ import annotations
+
+import importlib.machinery
+import importlib.util
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+def _load_cli():
+    path = ROOT / "scripts" / "odysseus-research"
+    loader = importlib.machinery.SourceFileLoader("odysseus_research_cli", str(path))
+    spec = importlib.util.spec_from_loader(loader.name, loader)
+    module = importlib.util.module_from_spec(spec)
+    loader.exec_module(module)
+    return module
+
+
+def _run_list(cli, tmp_path, monkeypatch, status, records):
+    cli._DATA_DIR = tmp_path
+    for name, blob in records.items():
+        (tmp_path / f"{name}.json").write_text(json.dumps(blob))
+    emitted = []
+    monkeypatch.setattr(cli, "emit", lambda value, args: emitted.append(value))
+    cli.cmd_list(SimpleNamespace(status=status, limit=50))
+    assert emitted, "cmd_list emitted nothing"
+    return [r["id"] for r in emitted[0]]
+
+
+def test_status_complete_matches_writer_done_records(tmp_path, monkeypatch):
+    """`--status complete` must return the records the writer marked `done`.
+    Without the alias this filter is silently empty on any real corpus."""
+    cli = _load_cli()
+    ids = _run_list(cli, tmp_path, monkeypatch, status="complete", records={
+        "rp-done":      {"query": "finished one", "status": "done",      "started_at": "2026-01-02"},
+        "rp-running":   {"query": "still running", "status": "running",  "started_at": "2026-01-01"},
+        "rp-cancelled": {"query": "user stopped",  "status": "cancelled","started_at": "2025-12-31"},
+    })
+    assert ids == ["rp-done"], (
+        "--status complete should alias to the writer's stored 'done' value; "
+        f"got {ids}. The alias map in `_STATUS_CLI_TO_STORED` was bypassed."
+    )
+
+
+def test_status_running_still_matches_verbatim(tmp_path, monkeypatch):
+    """`running` is stored verbatim, so the alias must NOT rewrite it.
+    A blanket map that turned every CLI choice into a stored variant would
+    re-introduce the empty-result bug on the running/cancelled/error paths."""
+    cli = _load_cli()
+    ids = _run_list(cli, tmp_path, monkeypatch, status="running", records={
+        "rp-done":    {"query": "finished",     "status": "done"},
+        "rp-running": {"query": "still running", "status": "running"},
+    })
+    assert ids == ["rp-running"], f"--status running must match verbatim; got {ids}"
+
+
+def test_status_cancelled_still_matches_verbatim(tmp_path, monkeypatch):
+    cli = _load_cli()
+    ids = _run_list(cli, tmp_path, monkeypatch, status="cancelled", records={
+        "rp-done":      {"query": "finished",  "status": "done"},
+        "rp-cancelled": {"query": "user stop", "status": "cancelled"},
+    })
+    assert ids == ["rp-cancelled"]
+
+
+def test_status_error_still_matches_verbatim(tmp_path, monkeypatch):
+    cli = _load_cli()
+    ids = _run_list(cli, tmp_path, monkeypatch, status="error", records={
+        "rp-done":  {"query": "finished", "status": "done"},
+        "rp-error": {"query": "crashed",  "status": "error"},
+    })
+    assert ids == ["rp-error"]
+
+
+def test_status_filter_tolerates_missing_or_non_string_status(tmp_path, monkeypatch):
+    """A corrupt record with no `status` (or a non-string status) must not
+    crash the filter and must not falsely match `--status complete`. The
+    existing `_load_path` already drops non-dict blobs; this guards the
+    next layer."""
+    cli = _load_cli()
+    ids = _run_list(cli, tmp_path, monkeypatch, status="complete", records={
+        "rp-good":  {"query": "ok",  "status": "done"},
+        "rp-blank": {"query": "no status field"},
+        "rp-typed": {"query": "non-string", "status": 42},
+    })
+    assert ids == ["rp-good"], (
+        "--status complete should only match the writer's 'done' string; "
+        f"got {ids}."
+    )