fix: visual report drops photos whose URL slug contains icon or logo (#1685)

This commit is contained in:
Afonso Coutinho
2026-06-03 06:22:45 +01:00
committed by GitHub
parent 3d00c85636
commit 1161040efe
2 changed files with 44 additions and 3 deletions

View File

@@ -1663,6 +1663,20 @@ def _extract_report_title(markdown_text: str, fallback: str):
return fallback, markdown_text
_ICON_LOGO_RE = re.compile(r'/(icon|logo|favicon)([._/-]|$)', re.IGNORECASE)
def _is_icon_or_logo_url(url: str) -> bool:
"""True if a URL path points at an icon/logo/favicon asset.
Matches the icon/logo/favicon token only at a path-segment or basename
boundary, so a real photo whose slug merely CONTAINS the word (e.g.
/iconic-moment.jpg, /logos-history.png) is no longer dropped, while
/icon.png, /logo.svg and /favicon.ico still are.
"""
return bool(_ICON_LOGO_RE.search(url or ""))
def generate_visual_report(
question: str,
report_markdown: str,
@@ -1711,9 +1725,7 @@ def generate_visual_report(
and img not in hidden_images_set
and not img.endswith((".svg", ".ico", ".gif"))
and not any(b in img for b in _IMAGE_BLOCKLIST)
and "/icon" not in img.lower()
and "/logo" not in img.lower()
and "/favicon" not in img.lower()):
and not _is_icon_or_logo_url(img)):
_seen_images.add(img)
all_images.append(img)

View File

@@ -0,0 +1,29 @@
"""Hero/section image selection must not drop photos whose slug contains
'icon' or 'logo' as a substring.
generate_visual_report filtered images with `"/icon" not in url` etc., a
plain substring test that wrongly dropped legitimate photos like
/iconic-moment-2026.jpg and /logos-history-explained.png while intending
to drop only icon/logo/favicon ASSETS. The boundary-aware
_is_icon_or_logo_url helper fixes that.
"""
from src.visual_report import _is_icon_or_logo_url
def test_real_photos_with_icon_or_logo_in_slug_are_kept():
assert _is_icon_or_logo_url("https://news.com/iconic-moment-2026.jpg") is False
assert _is_icon_or_logo_url("https://news.com/logos-history-explained.png") is False
assert _is_icon_or_logo_url("https://x.com/the-iconography-of-art.jpg") is False
def test_actual_icon_and_logo_assets_are_still_flagged():
assert _is_icon_or_logo_url("https://x.com/icon.png") is True
assert _is_icon_or_logo_url("https://x.com/logo.svg") is True
assert _is_icon_or_logo_url("https://x.com/favicon.ico") is True
assert _is_icon_or_logo_url("https://x.com/assets/icon/main.png") is True
assert _is_icon_or_logo_url("https://x.com/logo-dark.png") is True
def test_empty_and_none_are_not_flagged():
assert _is_icon_or_logo_url("") is False
assert _is_icon_or_logo_url(None) is False