Harden PDF document markers against cross-owner upload access (#445)

Route PDF lookups through UploadHandler.resolve_upload, reject poisoned pdf_source markers on document create/update, and add regression tests.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Duarte Antunes
2026-06-01 14:38:14 +01:00
committed by GitHub
parent b2e8d692a4
commit 448401a0fc
5 changed files with 183 additions and 106 deletions

View File

@@ -167,9 +167,18 @@ def find_source_upload_id(content: str) -> Optional[str]:
Matches both the form-source marker (`pdf_form_source`) used for fillable
PDFs and the plain marker (`pdf_source`) used for any imported PDF.
Rejects malformed ids (path traversal, wrong shape) before any lookup.
"""
from src.upload_handler import is_valid_upload_id
m = _FRONT_MATTER_RE.search(content or "") or _PLAIN_FRONT_MATTER_RE.search(content or "")
return m.group("upload_id") if m else None
if not m:
return None
upload_id = m.group("upload_id")
if not is_valid_upload_id(upload_id):
logger.warning("Ignoring invalid pdf_source upload_id in document content: %r", upload_id)
return None
return upload_id
def render_plain_pdf_markdown(upload_id: str, title: str, body_text: Optional[str] = None) -> str:

View File

@@ -29,6 +29,14 @@ import logging
logger = logging.getLogger(__name__)
UPLOAD_ID_RE = re.compile(r"^[0-9a-fA-F]{32}\.[A-Za-z0-9]+$")
def is_valid_upload_id(upload_id: str) -> bool:
"""Return True when *upload_id* matches the canonical uploads.json id format."""
return UPLOAD_ID_RE.fullmatch(upload_id or "") is not None
class UploadHandler:
def __init__(self, base_dir: str, upload_dir: str):
self.base_dir = base_dir
@@ -223,8 +231,7 @@ class UploadHandler:
def validate_upload_id(self, upload_id: str) -> bool:
"""Validate that the upload ID matches the expected pattern."""
pattern = r'^[0-9a-fA-F]{32}\.[A-Za-z0-9]+$'
return re.fullmatch(pattern, upload_id) is not None
return is_valid_upload_id(upload_id)
def _inside_upload_dir(self, path: str) -> bool:
"""Check if path is inside the upload directory."""