diff --git a/src/visual_report.py b/src/visual_report.py index 007d0dd..70af4b2 100644 --- a/src/visual_report.py +++ b/src/visual_report.py @@ -1663,6 +1663,20 @@ def _extract_report_title(markdown_text: str, fallback: str): return fallback, markdown_text +_ICON_LOGO_RE = re.compile(r'/(icon|logo|favicon)([._/-]|$)', re.IGNORECASE) + + +def _is_icon_or_logo_url(url: str) -> bool: + """True if a URL path points at an icon/logo/favicon asset. + + Matches the icon/logo/favicon token only at a path-segment or basename + boundary, so a real photo whose slug merely CONTAINS the word (e.g. + /iconic-moment.jpg, /logos-history.png) is no longer dropped, while + /icon.png, /logo.svg and /favicon.ico still are. + """ + return bool(_ICON_LOGO_RE.search(url or "")) + + def generate_visual_report( question: str, report_markdown: str, @@ -1711,9 +1725,7 @@ def generate_visual_report( and img not in hidden_images_set and not img.endswith((".svg", ".ico", ".gif")) and not any(b in img for b in _IMAGE_BLOCKLIST) - and "/icon" not in img.lower() - and "/logo" not in img.lower() - and "/favicon" not in img.lower()): + and not _is_icon_or_logo_url(img)): _seen_images.add(img) all_images.append(img) diff --git a/tests/test_visual_report_icon_url.py b/tests/test_visual_report_icon_url.py new file mode 100644 index 0000000..1ba394b --- /dev/null +++ b/tests/test_visual_report_icon_url.py @@ -0,0 +1,29 @@ +"""Hero/section image selection must not drop photos whose slug contains +'icon' or 'logo' as a substring. + +generate_visual_report filtered images with `"/icon" not in url` etc., a +plain substring test that wrongly dropped legitimate photos like +/iconic-moment-2026.jpg and /logos-history-explained.png while intending +to drop only icon/logo/favicon ASSETS. The boundary-aware +_is_icon_or_logo_url helper fixes that. +""" +from src.visual_report import _is_icon_or_logo_url + + +def test_real_photos_with_icon_or_logo_in_slug_are_kept(): + assert _is_icon_or_logo_url("https://news.com/iconic-moment-2026.jpg") is False + assert _is_icon_or_logo_url("https://news.com/logos-history-explained.png") is False + assert _is_icon_or_logo_url("https://x.com/the-iconography-of-art.jpg") is False + + +def test_actual_icon_and_logo_assets_are_still_flagged(): + assert _is_icon_or_logo_url("https://x.com/icon.png") is True + assert _is_icon_or_logo_url("https://x.com/logo.svg") is True + assert _is_icon_or_logo_url("https://x.com/favicon.ico") is True + assert _is_icon_or_logo_url("https://x.com/assets/icon/main.png") is True + assert _is_icon_or_logo_url("https://x.com/logo-dark.png") is True + + +def test_empty_and_none_are_not_flagged(): + assert _is_icon_or_logo_url("") is False + assert _is_icon_or_logo_url(None) is False