From fa1fe7f866a73fc8d31b78e67faa7cd76dfbeb63 Mon Sep 17 00:00:00 2001 From: Joeseph Grey <212606152+StressTestor@users.noreply.github.com> Date: Thu, 4 Jun 2026 06:42:49 -0600 Subject: [PATCH] security: sanitize rendered research-report HTML (#364) The visual research report is assembled from LLM output over crawled web pages (untrusted content) and served under a relaxed `script-src 'unsafe-inline'` CSP. Two values reached that HTML without sanitization: - `_md_to_html` rendered the report markdown via python-markdown, which passes raw HTML through verbatim, so `", + '', + "", + 'x', +]) +def test_md_to_html_strips_active_content(payload): + from src.visual_report import _md_to_html + + out = _md_to_html(f"Report body.\n\n{payload}").lower() + + assert "\nRaw findings\n\ncontent\n\n" + ) + out = _md_to_html(md) + + assert "

on a + # report page served under `script-src 'unsafe-inline'`, so it must be escaped + # or it's an attribute-injection XSS independent of the markdown body. + from src.visual_report import generate_visual_report + + html = generate_visual_report( + question="q", + report_markdown="## H\n\nbody", + category='">', + ) + + assert "