From 9d8eebfa6397f0838937e1d6bdedcd2e484332fe Mon Sep 17 00:00:00 2001
From: Afonso Coutinho <116525378+afonsopc@users.noreply.github.com>
Date: Tue, 2 Jun 2026 03:41:33 +0100
Subject: [PATCH] fix: source thumbnails dropped for http-only og:image URLs
(#667)
* fix: accept http (not just https) og:image URLs for source thumbnails
* test: og:image extraction accepts http and skips relative/svg
---
src/search/content.py | 4 ++--
tests/test_og_image_extraction.py | 32 +++++++++++++++++++++++++++++++
2 files changed, 34 insertions(+), 2 deletions(-)
create mode 100644 tests/test_og_image_extraction.py
diff --git a/src/search/content.py b/src/search/content.py
index 1c469e8..9711a03 100644
--- a/src/search/content.py
+++ b/src/search/content.py
@@ -130,9 +130,9 @@ def _extract_og_image(soup: BeautifulSoup) -> str:
tag = soup.find("meta", attrs={"name": "thumbnail"})
if tag and tag.get("content", "").strip():
candidates.append(tag["content"].strip())
- # Return first absolute https URL
+ # Return first absolute http(s) URL
for url in candidates:
- if url.startswith("https://") and not url.endswith((".svg", ".ico")):
+ if url.startswith(("https://", "http://")) and not url.endswith((".svg", ".ico")):
return url
return ""
diff --git a/tests/test_og_image_extraction.py b/tests/test_og_image_extraction.py
new file mode 100644
index 0000000..164d51a
--- /dev/null
+++ b/tests/test_og_image_extraction.py
@@ -0,0 +1,32 @@
+"""Tests for og:image extraction (src/search/content.py)."""
+import pytest
+
+pytest.importorskip("bs4")
+from bs4 import BeautifulSoup
+
+from src.search.content import _extract_og_image
+
+
+def _soup(html: str) -> BeautifulSoup:
+ return BeautifulSoup(html, "html.parser")
+
+
+def test_accepts_http_og_image():
+ # Regression: only https URLs were returned, so plain-http og:image
+ # (still common) yielded no thumbnail despite the docstring promising
+ # "http(s)".
+ html = ''
+ assert _extract_og_image(_soup(html)) == "http://example.com/cover.jpg"
+
+
+def test_still_accepts_https_og_image():
+ html = ''
+ assert _extract_og_image(_soup(html)) == "https://example.com/cover.png"
+
+
+def test_skips_relative_and_svg():
+ html = (
+ ''
+ ''
+ )
+ assert _extract_og_image(_soup(html)) == ""