Harden PDF document markers against cross-owner upload access (#445)

Route PDF lookups through UploadHandler.resolve_upload, reject poisoned pdf_source markers on document create/update, and add regression tests.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Duarte Antunes
2026-06-01 14:38:14 +01:00
committed by GitHub
parent b2e8d692a4
commit 448401a0fc
5 changed files with 183 additions and 106 deletions

View File

@@ -167,9 +167,18 @@ def find_source_upload_id(content: str) -> Optional[str]:
Matches both the form-source marker (`pdf_form_source`) used for fillable
PDFs and the plain marker (`pdf_source`) used for any imported PDF.
Rejects malformed ids (path traversal, wrong shape) before any lookup.
"""
from src.upload_handler import is_valid_upload_id
m = _FRONT_MATTER_RE.search(content or "") or _PLAIN_FRONT_MATTER_RE.search(content or "")
return m.group("upload_id") if m else None
if not m:
return None
upload_id = m.group("upload_id")
if not is_valid_upload_id(upload_id):
logger.warning("Ignoring invalid pdf_source upload_id in document content: %r", upload_id)
return None
return upload_id
def render_plain_pdf_markdown(upload_id: str, title: str, body_text: Optional[str] = None) -> str: