From 55fa223e4d1cc0ed08bc25c3da10eba8efe06d33 Mon Sep 17 00:00:00 2001 From: tanmayraut45 Date: Tue, 2 Jun 2026 07:53:40 +0530 Subject: [PATCH] Exempt task webhook trigger from session auth (#784) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POSTing to the per-task webhook URL shown in the Tasks UI returned 401 Unauthorized even though the URL is labelled "no auth needed". The trigger handler at routes/task_routes.py:873 (`POST /api/tasks/{task_id}/webhook/{token}`) was written as an unauthenticated endpoint — the 32-byte path-embedded `webhook_token` generated by `secrets.token_urlsafe(32)` is the credential, and the handler validates it against the row before doing anything. But AuthMiddleware in app.py runs first and only knows about AUTH_EXEMPT_EXACT (static path set) and AUTH_EXEMPT_PREFIXES (only `/static`), so every external POST (curl, Zapier, n8n, Make, Activepieces) got rejected before the route ever saw the request. External callers can't supply a session cookie, which is precisely why the per-task token exists. Fix: add an AUTH_EXEMPT_PATTERNS list of compiled regexes for dynamic public paths and route `^/api/tasks/[^/]+/webhook/[^/]+/?$` through it. The route handler still enforces `ScheduledTask.webhook_token == token` and 404s on mismatch, so an attacker without the token gets a 404 (indistinguishable from a non-existent task), and a holder of the token gets the documented "POST and a task fires" behaviour. The sibling endpoint `/{task_id}/webhook-regenerate` is admin-gated and deliberately does NOT match the pattern — it requires `_owner(request)` and a session. Tests: tests/test_webhook_trigger_auth_exempt.py extracts the regex list out of app.py, applies it to a representative trigger path (positive) and the four neighbouring task paths that must stay authenticated (negative — `/api/tasks`, `/api/tasks/{id}`, `/api/tasks/{id}/webhook-regenerate`, `/api/tasks/{id}/run`), and pins the handler-side token check so a refactor of the route doesn't quietly turn the endpoint into a truly anonymous one. Closes #621. --- app.py | 18 ++++- tests/test_webhook_trigger_auth_exempt.py | 95 +++++++++++++++++++++++ 2 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 tests/test_webhook_trigger_auth_exempt.py diff --git a/app.py b/app.py index b31f0d8..ca00df1 100644 --- a/app.py +++ b/app.py @@ -169,9 +169,25 @@ if AUTH_ENABLED: "/login", } AUTH_EXEMPT_PREFIXES = ["/static"] + # Dynamic paths whose own handler proves identity via a path-embedded + # secret instead of the session/bearer auth. The route handler at + # routes/task_routes.py validates the per-task `webhook_token` itself + # and returns 404 on mismatch, so the path is the credential — the + # UI labels these URLs "no auth needed" precisely because external + # callers (Zapier, n8n, curl) can't supply a session cookie. Without + # this exemption AuthMiddleware rejects every POST with 401 before + # the token is ever checked. + import re as _re + AUTH_EXEMPT_PATTERNS = [ + _re.compile(r"^/api/tasks/[^/]+/webhook/[^/]+/?$"), + ] def _is_auth_exempt(path: str) -> bool: - return path in AUTH_EXEMPT_EXACT or any(path.startswith(p) for p in AUTH_EXEMPT_PREFIXES) + if path in AUTH_EXEMPT_EXACT: + return True + if any(path.startswith(p) for p in AUTH_EXEMPT_PREFIXES): + return True + return any(p.match(path) for p in AUTH_EXEMPT_PATTERNS) # In-memory token cache: prefix → list[(token_id, token_hash, owner, scopes)]. The DB # query was running on every API-bearer request and scanning bcrypt diff --git a/tests/test_webhook_trigger_auth_exempt.py b/tests/test_webhook_trigger_auth_exempt.py new file mode 100644 index 0000000..a419c49 --- /dev/null +++ b/tests/test_webhook_trigger_auth_exempt.py @@ -0,0 +1,95 @@ +"""Pin the auth exemption for task webhook-trigger URLs. + +The task router exposes ``POST /api/tasks/{task_id}/webhook/{token}`` as a +public webhook entrypoint — the path-embedded ``webhook_token`` is the +credential, and the route handler in ``routes/task_routes.py`` validates +it against the row and returns 404 on mismatch. The UI advertises the +URL as "no auth needed" because external callers (Zapier, n8n, curl) +can't supply a session cookie. + +Without an entry in ``AUTH_EXEMPT_PATTERNS`` ``AuthMiddleware`` rejected +every POST with 401 before the token was ever checked (issue #621). +This test re-reads the exemption logic out of ``app.py`` and confirms a +representative webhook path is treated as exempt, while neighbouring +non-public task paths are NOT. +""" + +import os +import re + + +def _read_app_source() -> str: + app_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "app.py", + ) + with open(app_path, encoding="utf-8") as fh: + return fh.read() + + +def test_webhook_trigger_path_is_in_exempt_patterns(): + """The dynamic webhook trigger path must match an AUTH_EXEMPT_PATTERNS + entry. Pull every regex literal compiled inside the block out of the + source and apply it directly — extraction has to tolerate nested + brackets inside each character class (e.g. ``[^/]+``).""" + src = _read_app_source() + # Find the start of the list, then walk character-by-character to the + # matching closing bracket. A regex would have to count brackets, + # which is more painful than just doing the count by hand. + start = src.find("AUTH_EXEMPT_PATTERNS") + assert start != -1, "AUTH_EXEMPT_PATTERNS not declared in app.py" + lb = src.find("[", start) + assert lb != -1 + depth = 0 + end = -1 + for i in range(lb, len(src)): + ch = src[i] + if ch == "[": + depth += 1 + elif ch == "]": + depth -= 1 + if depth == 0: + end = i + break + assert end != -1, "could not find closing bracket for AUTH_EXEMPT_PATTERNS" + body = src[lb + 1 : end] + # Pull each compiled regex literal: _re.compile(r"..."). + patterns = re.findall(r'_re\.compile\(\s*r"([^"]+)"\s*\)', body) + assert patterns, ( + "expected at least one compiled regex in AUTH_EXEMPT_PATTERNS" + ) + compiled = [re.compile(p) for p in patterns] + + sample = "/api/tasks/abc123/webhook/" + "x" * 43 + assert any(c.match(sample) for c in compiled), ( + f"webhook trigger path {sample!r} must be auth-exempt - issue #621" + ) + + # Negative: routes that are NOT meant to be public must not match. + for not_public in ( + "/api/tasks", + "/api/tasks/abc123", + "/api/tasks/abc123/webhook-regenerate", + "/api/tasks/abc123/run", + ): + assert not any(c.match(not_public) for c in compiled), ( + f"{not_public!r} must NOT be auth-exempt" + ) + + +def test_webhook_trigger_handler_still_validates_token(): + """The exemption is only safe because the route handler in + routes/task_routes.py still checks the token against the row and + returns 404 on mismatch. Pin that behaviour so a refactor of the + handler doesn't quietly make the endpoint truly anonymous. Read the + source directly — importing task_routes pulls in SQLAlchemy and + fails under the conftest stubs.""" + routes_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.abspath(__file__))), + "routes", + "task_routes.py", + ) + with open(routes_path, encoding="utf-8") as fh: + src = fh.read() + assert "ScheduledTask.webhook_token == token" in src + assert '@router.post("/{task_id}/webhook/{token}")' in src