From 49885ff9e7b80eab2127295fbcd038843ca3698d Mon Sep 17 00:00:00 2001 From: Wes Huber Date: Tue, 2 Jun 2026 21:30:04 -0700 Subject: [PATCH] fix(documents): use strip_pdf_content_marker instead of lstrip for PDF auto-open (#1727) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lstrip("\n[PDF content]:") treats the argument as a character set, not a prefix, so it chews into the following [Page N text]: marker — e.g. turning [Page 1 text]: into "age 1 text]:". The correct helper strip_pdf_content_marker (which uses removeprefix) already exists in the same file and is used by other call sites. Fixes #1663 Co-authored-by: Claude Opus 4.6 (1M context) --- src/document_processor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/document_processor.py b/src/document_processor.py index 486aaba..3616cf8 100644 --- a/src/document_processor.py +++ b/src/document_processor.py @@ -394,9 +394,7 @@ def build_user_content( # Pull the PDF prose once — used as either intro_text # (form path) or the doc body (plain path). try: - pdf_body_text = _process_pdf(path).lstrip( - "\n[PDF content]:" - ).strip() + pdf_body_text = strip_pdf_content_marker(_process_pdf(path)) except Exception: pdf_body_text = None