feat: add /api/ready readiness probe (DB, data dir, local-first) (#1200)
/api/health is a liveness ping. This adds /api/ready as a readiness /
integrity self-check that returns 503 unless every critical subsystem is
whole, so an orchestrator (Docker/Compose/k8s) can gate traffic on real
readiness rather than mere process liveness:
- database: opens a connection and runs SELECT 1
- data_dir: confirms the data directory exists and is writable
- local_first: reports whether storage stays on the host (informational;
a remote database is a valid deployment, so it never fails readiness)
The check logic lives in src/readiness.py so it is unit-testable in
isolation; the route is a thin wrapper. Covered by tests/test_readiness.py.
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
11
app.py
11
app.py
@@ -772,6 +772,17 @@ async def get_version():
|
||||
async def health_check() -> Dict[str, str]:
|
||||
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
|
||||
|
||||
@app.get("/api/ready")
|
||||
async def readiness_check() -> JSONResponse:
|
||||
"""Readiness / integrity self-check — DB, data dir, local-first storage.
|
||||
|
||||
Unlike /api/health (liveness), this returns 503 unless every critical
|
||||
subsystem is whole, so an orchestrator can gate traffic on real readiness.
|
||||
"""
|
||||
from src.readiness import check_readiness
|
||||
result = check_readiness()
|
||||
return JSONResponse(status_code=200 if result.get("ready") else 503, content=result)
|
||||
|
||||
@app.get("/api/runtime")
|
||||
async def runtime_info() -> Dict[str, object]:
|
||||
in_docker = os.path.exists("/.dockerenv")
|
||||
|
||||
61
src/readiness.py
Normal file
61
src/readiness.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Ithaca anchor — local-instance readiness / integrity self-check.
|
||||
|
||||
Beyond ``/api/health``'s liveness ping, this confirms the self-hosted instance is
|
||||
whole and at home: the database is reachable, the data directory is present and
|
||||
writable, and storage is local-first. Served by ``GET /api/ready`` and suitable
|
||||
for an orchestrator readiness probe (200 only when every critical check passes).
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def check_readiness() -> Dict[str, object]:
|
||||
"""Run the readiness checks and return a JSON-serialisable report.
|
||||
|
||||
``ready`` is True only when every critical check (database, data_dir) passes.
|
||||
``local_first`` is informational — a remote database is a valid deployment, so
|
||||
it never fails readiness, it only reports whether storage stays on this host.
|
||||
"""
|
||||
from core.constants import APP_VERSION, DATA_DIR
|
||||
from core.database import DATABASE_URL, engine
|
||||
from sqlalchemy import text as sql_text
|
||||
|
||||
checks: Dict[str, Dict[str, object]] = {}
|
||||
|
||||
# Database reachable — the simplest honest probe that the engine is live.
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
conn.execute(sql_text("SELECT 1"))
|
||||
checks["database"] = {"ok": True}
|
||||
except Exception as e:
|
||||
checks["database"] = {"ok": False, "error": str(e)}
|
||||
|
||||
# Data directory present and writable — home must be able to hold its own data.
|
||||
try:
|
||||
os.makedirs(DATA_DIR, exist_ok=True)
|
||||
probe = os.path.join(DATA_DIR, f".ready_probe_{uuid.uuid4().hex}")
|
||||
with open(probe, "w", encoding="utf-8") as fh:
|
||||
fh.write("ok")
|
||||
os.remove(probe)
|
||||
checks["data_dir"] = {"ok": True, "path": DATA_DIR}
|
||||
except Exception as e:
|
||||
checks["data_dir"] = {"ok": False, "error": str(e)}
|
||||
|
||||
# Local-first: storage stays on the home machine (informational, never fatal).
|
||||
local_first = (
|
||||
DATABASE_URL.startswith("sqlite")
|
||||
or "localhost" in DATABASE_URL
|
||||
or "127.0.0.1" in DATABASE_URL
|
||||
)
|
||||
checks["local_first"] = {"ok": True, "local": local_first}
|
||||
|
||||
ready = all(bool(c.get("ok")) for c in checks.values())
|
||||
return {
|
||||
"ready": ready,
|
||||
"version": APP_VERSION,
|
||||
"checks": checks,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
}
|
||||
27
tests/test_readiness.py
Normal file
27
tests/test_readiness.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Tests for the readiness / integrity self-check (src/readiness.py)."""
|
||||
|
||||
from src.readiness import check_readiness
|
||||
|
||||
|
||||
def test_readiness_reports_core_subsystems():
|
||||
result = check_readiness()
|
||||
|
||||
assert {"ready", "version", "checks", "timestamp"}.issubset(result.keys())
|
||||
checks = result["checks"]
|
||||
for name in ("database", "data_dir", "local_first"):
|
||||
assert name in checks, f"missing check: {name}"
|
||||
|
||||
# In the dev/test environment the local SQLite DB and data dir are present,
|
||||
# so the critical checks must pass and overall readiness must be True.
|
||||
assert checks["database"]["ok"] is True, checks["database"]
|
||||
assert checks["data_dir"]["ok"] is True, checks["data_dir"]
|
||||
assert result["ready"] is True, result
|
||||
|
||||
|
||||
def test_local_first_check_is_informational_never_fatal():
|
||||
result = check_readiness()
|
||||
lf = result["checks"]["local_first"]
|
||||
# local_first reports whether storage stays on-host but must never gate
|
||||
# readiness — a remote database is a valid deployment.
|
||||
assert lf["ok"] is True
|
||||
assert "local" in lf
|
||||
Reference in New Issue
Block a user