From 42ef4b6502d300b41d8f24d09ec756fb8bd7105f Mon Sep 17 00:00:00 2001 From: red person Date: Wed, 3 Jun 2026 08:12:38 +0300 Subject: [PATCH] Skip invalid research CLI records (#1394) --- scripts/odysseus-research | 23 ++++++++++-------- tests/test_research_cli_store.py | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 tests/test_research_cli_store.py diff --git a/scripts/odysseus-research b/scripts/odysseus-research index c640984..f483f3c 100755 --- a/scripts/odysseus-research +++ b/scripts/odysseus-research @@ -26,14 +26,19 @@ from pathlib import Path _DATA_DIR = _REPO_ROOT / "data" / "deep_research" +def _load_path(path: Path) -> dict | None: + try: + data = json.loads(path.read_text()) + except (json.JSONDecodeError, OSError): + return None + return data if isinstance(data, dict) else None + + def _load(rp_id: str) -> dict | None: path = _DATA_DIR / f"{rp_id}.json" if not path.exists(): return None - try: - return json.loads(path.read_text()) - except json.JSONDecodeError: - return None + return _load_path(path) def _preview_text(value, limit: int = 200) -> str: @@ -64,9 +69,8 @@ def cmd_list(args): out = [] for path in sorted(_DATA_DIR.glob("*.json")): rp_id = path.stem - try: - data = json.loads(path.read_text()) - except Exception: + data = _load_path(path) + if data is None: continue if args.status and (data.get("status") or "") != args.status: continue @@ -108,9 +112,8 @@ def cmd_search(args): out = [] for path in _DATA_DIR.glob("*.json"): rp_id = path.stem - try: - data = json.loads(path.read_text()) - except Exception: + data = _load_path(path) + if data is None: continue haystack = " ".join([ (data.get("query") or "").lower(), diff --git a/tests/test_research_cli_store.py b/tests/test_research_cli_store.py new file mode 100644 index 0000000..cffadf2 --- /dev/null +++ b/tests/test_research_cli_store.py @@ -0,0 +1,41 @@ +import importlib.machinery +import importlib.util +import json +from pathlib import Path +from types import SimpleNamespace + + +ROOT = Path(__file__).resolve().parents[1] + + +def _load_cli(): + path = ROOT / "scripts" / "odysseus-research" + loader = importlib.machinery.SourceFileLoader("odysseus_research_cli", str(path)) + spec = importlib.util.spec_from_loader(loader.name, loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module + + +def test_list_skips_non_object_research_records(tmp_path, monkeypatch): + cli = _load_cli() + cli._DATA_DIR = tmp_path + (tmp_path / "good.json").write_text(json.dumps({"query": "hello", "status": "complete"})) + (tmp_path / "list.json").write_text("[]") + (tmp_path / "broken.json").write_text("{") + + emitted = [] + monkeypatch.setattr(cli, "emit", lambda value, args: emitted.append(value)) + + cli.cmd_list(SimpleNamespace(status=None, limit=50)) + + assert emitted == [[{ + "id": "good", + "query": "hello", + "category": "", + "status": "complete", + "started_at": "", + "completed_at": "", + "sources": 0, + "stats": {}, + }]]