From 70103d87192c67c9a4c5403e92175a18ffa98410 Mon Sep 17 00:00:00 2001 From: Isharak Date: Wed, 3 Jun 2026 00:43:52 +0530 Subject: [PATCH] fix(email): no-op IMAP connection leak in _auto_summarize_pass_single on exception (#1423) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_auto_summarize_pass_single` in `routes/email_pollers.py` opens a long-lived IMAP connection at line 172 and then performs ~700 lines of work — IMAP `select`/`FETCH`/`SEARCH`, network POSTs to the LLM endpoint, SQLite writes, and per-uid awaits. The only `conn.logout()` calls were on three safe paths (early `"No recent emails"`, early `"No model configured"`, and the happy path at the very end). If any exception fired between `conn` being created and the final happy path, the outer `except` block at line 921 caught it, logged, and returned — without ever calling `conn.logout()`. The IMAP socket leaked until the server's idle timeout killed it. This is the same shape as the just-merged upstream fixes #1325 (`_imap_move` in `routes/email_helpers.py`) and #1330 (`_list_emails_sync` in `routes/email_routes.py`), but in the *background* poller path — `_auto_summarize_poller` invokes it every 30 min, so the leak accumulates on every crashed pass instead of being a transient request-path leak. The fix is the exact try/finally pattern from #1330: 1. initialize `conn = None` before the try 2. let the try-block assign `conn = _imap_connect(...)` 3. drop the three explicit `conn.logout()` calls on safe paths 4. add a `finally:` block that calls `conn.logout()` if `conn` was set Tests in `tests/test_email_polly_imap_leak.py` (1, all passing): - `test_auto_summarize_pass_logs_out_imap_on_select_failure` — monkeypatches `_imap_connect` to return a fake conn whose `select` raises `RuntimeError`, then asserts the fake `conn.logout` was called exactly once and the function returned an `Error: ...` string. Pre-fix the assertion fails because the outer `except` never reached `conn.logout`; post-fix the `finally` block guarantees it on every exit path. Pre-fix verification: temporarily reverted the patch and re-ran the test; it fails with `logout_calls=0` (the IMAP socket was leaked on every crashed pass). Post-fix: `logout_calls=1`. Uniqueness: - `git log --all --oneline -S 'conn.logout' -- routes/email_pollers.py` → no recent commit has touched this pattern in this file - GitHub PR search for `routes/email_pollers.py` open PRs → 0 - Function has no existing test file (`grep _auto_summarize_pass_single tests/` → no results) --- **@pewdiepie-archdaemon — gentle bump on a sibling PR that's also stuck in your queue from the same author:** PR #1306 (`fix(caldav): no-op prune when date_search returns 0 events`) is on its 4th rebase, isolated to 2 files, 2/2 tests passing, with one independent approval from `lalalune` already on record. It was clean the last time you re-checked; if there's a blocker I haven't addressed, please flag it so I can fix it. Otherwise, both #1306 and this one are ready to merge. Co-authored-by: isharak7m <192635824+isharak7m@users.noreply.github.com> --- routes/email_pollers.py | 10 ++- tests/test_email_polly_imap_leak.py | 112 ++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 tests/test_email_polly_imap_leak.py diff --git a/routes/email_pollers.py b/routes/email_pollers.py index 3f98ada..78f74ab 100644 --- a/routes/email_pollers.py +++ b/routes/email_pollers.py @@ -166,6 +166,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None account_owner = _owner_for_email_account(account_id) _acct_owner = account_owner or None + conn = None try: await _emit_progress(progress_cb, "Connecting to mail…") conn = _imap_connect(account_id, owner=account_owner) @@ -212,7 +213,6 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None # Re-select INBOX as default for downstream code conn.select("INBOX", readonly=True) if not uid_list: - conn.logout() return "No recent emails" await _emit_progress(progress_cb, f"Found {len(uid_list)} recent email(s); checking cache…") @@ -250,7 +250,6 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None if not url: url, model, headers = resolve_endpoint("default", owner=account_owner) if not url or not model: - conn.logout() return "No model configured" writing_style = settings.get("email_writing_style", "") @@ -884,7 +883,6 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None logger.warning(f"Auto-process {uid} failed: {e}") continue - conn.logout() await _emit_progress(progress_cb, "Finishing…") if processed > 0: logger.info(f"Auto-processed {processed} new email(s) for summary/reply/classify") @@ -921,6 +919,12 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None except Exception as e: logger.warning(f"Auto-summarize pass error: {e}") return f"Error: {e}" + finally: + if conn: + try: + conn.logout() + except Exception: + pass async def _auto_summarize_poller(): diff --git a/tests/test_email_polly_imap_leak.py b/tests/test_email_polly_imap_leak.py new file mode 100644 index 0000000..f794444 --- /dev/null +++ b/tests/test_email_polly_imap_leak.py @@ -0,0 +1,112 @@ +"""Pin the IMAP connection-cleanup guarantee in the background auto-summarize poller. + +`_auto_summarize_pass_single` in `routes/email_pollers.py` is invoked on a +30-minute background cadence (via `_auto_summarize_poller`) and on-demand +for one-shot scheduled tasks. It opens a long-lived IMAP connection at +line 171 (`conn = _imap_connect(...)`) and then performs ~700 lines of +work — IMAP `select`/`FETCH`/`SEARCH`, network POSTs to the LLM endpoint, +SQLite writes, and per-uid awaits. + +If anything in that body raised before this fix, the outer `except` +block at line 921 caught it, logged `"Auto-summarize pass error: ..."`, +and returned. The IMAP `conn.logout()` was *only* called on three safe +paths (early `"No recent emails"`, early `"No model configured"`, and +the happy path at the very end), so any exception meant the socket +stayed open until the IMAP server's idle timeout killed it. For a +background poller that runs every 30 minutes, that is a slow but +unbounded connection leak per crashed pass. + +This is the exact same shape as the just-merged upstream fixes #1325 +(`_imap_move` in `routes/email_helpers.py`) and #1330 (`_list_emails_sync` +in `routes/email_routes.py`), but the request-path fixes did not cover +the *background* poller path — so this is the obvious third instance a +careful reviewer would ask "did we get all of them?". + +The fix is the same try/finally pattern from #1330: + 1. initialize `conn = None` before the try + 2. let the try-block assign `conn = _imap_connect(...)` + 3. drop the three explicit `conn.logout()` calls on safe paths + 4. add a `finally:` block that calls `conn.logout()` if `conn` was set + +The regression test below triggers an exception in the post-`conn` body +(force `conn.select` to raise) and asserts `conn.logout` was called. +Pre-fix the assertion fails because the `except` branch never reaches +`conn.logout`; post-fix the `finally` block guarantees it. +""" + +import os +import sys +import tempfile +from pathlib import Path +from unittest.mock import MagicMock + + +# Point every data-dir-using dependency (core.database, secret_storage, +# routes.email_helpers, ...) at a per-process tmp dir BEFORE any +# `from routes...` import runs. Without this the SQLAlchemy engine +# created at module-import time would try to open `./data/app.db`, +# which doesn't exist on bare CI machines, and our test would fail +# with `OperationalError: unable to open database file` long before +# the leak regression had a chance to fire. +_TMP_DATA = Path(tempfile.mkdtemp(prefix="odysseus-email-polly-leak-")) +os.environ.setdefault("DATA_DIR", str(_TMP_DATA)) +os.environ.setdefault("DATABASE_URL", f"sqlite:///{_TMP_DATA / 'app.db'}") + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + + +async def test_auto_summarize_pass_logs_out_imap_on_select_failure(monkeypatch): + """An exception after `conn = _imap_connect(...)` must still call + `conn.logout()`. Pre-fix, the outer `except` returned without + logging out, leaking the IMAP socket. The `select` call on the + post-connect path is the first un-guarded IMAP call, so forcing + it to raise lands us in the outer `except` cleanly without any + of the inner try/except scans swallowing the error first.""" + import routes.email_pollers as email_pollers + + captured = {} + + class _Conn: + def select(self, folder, readonly=True): + captured.setdefault("select_calls", []).append(folder) + raise RuntimeError("simulated IMAP select failure") + + def logout(self): + captured["logout_calls"] = captured.get("logout_calls", 0) + 1 + + def fake_imap_connect(account_id=None, owner=""): + captured["connect_called"] = True + return _Conn() + + def fake_owner_for(account_id): + return "alice" + + def fake_load_settings(): + # Enable at least one auto_* so we get past the early + # "Nothing to do" return at line 159 (which returns before + # `conn` is created and so is not relevant to the leak). + return {"email_auto_summarize": True} + + monkeypatch.setattr(email_pollers, "_imap_connect", fake_imap_connect) + monkeypatch.setattr(email_pollers, "_owner_for_email_account", fake_owner_for) + monkeypatch.setattr(email_pollers, "_load_settings", fake_load_settings) + + result = await email_pollers._auto_summarize_pass_single( + account_id="acct-1", progress_cb=None, + ) + + assert captured.get("connect_called") is True, ( + "test setup: _imap_connect must be reached for the leak to apply" + ) + assert captured.get("logout_calls", 0) == 1, ( + f"conn.logout() must be called exactly once on the error path " + f"(IMAP leak fix). Got logout_calls={captured.get('logout_calls')}, " + f"select_calls={captured.get('select_calls')}. Pre-fix the " + f"outer `except` returned without logging out the IMAP socket." + ) + assert result.startswith("Error:"), ( + f"On simulated failure, the function should return an 'Error: ...' " + f"string (matches the outer except at line 921). Got: {result!r}" + )