From 49885ff9e7b80eab2127295fbcd038843ca3698d Mon Sep 17 00:00:00 2001
From: Wes Huber <wesleybaxterhuber@gmail.com>
Date: Tue, 2 Jun 2026 21:30:04 -0700
Subject: [PATCH] fix(documents): use strip_pdf_content_marker instead of
 lstrip for PDF auto-open (#1727)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

lstrip("\n[PDF content]:") treats the argument as a character set,
not a prefix, so it chews into the following [Page N text]: marker —
e.g. turning [Page 1 text]: into "age 1 text]:". The correct helper
strip_pdf_content_marker (which uses removeprefix) already exists in
the same file and is used by other call sites.

Fixes #1663

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/document_processor.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/document_processor.py b/src/document_processor.py
index 486aaba..3616cf8 100644
--- a/src/document_processor.py
+++ b/src/document_processor.py
@@ -394,9 +394,7 @@ def build_user_content(
                         # Pull the PDF prose once — used as either intro_text
                         # (form path) or the doc body (plain path).
                         try:
-                            pdf_body_text = _process_pdf(path).lstrip(
-                                "\n[PDF content]:"
-                            ).strip()
+                            pdf_body_text = strip_pdf_content_marker(_process_pdf(path))
                         except Exception:
                             pdf_body_text = None