From 9d8eebfa6397f0838937e1d6bdedcd2e484332fe Mon Sep 17 00:00:00 2001
From: Afonso Coutinho <116525378+afonsopc@users.noreply.github.com>
Date: Tue, 2 Jun 2026 03:41:33 +0100
Subject: [PATCH] fix: source thumbnails dropped for http-only og:image URLs
 (#667)

* fix: accept http (not just https) og:image URLs for source thumbnails

* test: og:image extraction accepts http and skips relative/svg
---
 src/search/content.py             |  4 ++--
 tests/test_og_image_extraction.py | 32 +++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_og_image_extraction.py
diff --git a/src/search/content.py b/src/search/content.py
index 1c469e8..9711a03 100644
--- a/src/search/content.py
+++ b/src/search/content.py
@@ -130,9 +130,9 @@ def _extract_og_image(soup: BeautifulSoup) -> str:
     tag = soup.find("meta", attrs={"name": "thumbnail"})
     if tag and tag.get("content", "").strip():
         candidates.append(tag["content"].strip())
-    # Return first absolute https URL
+    # Return first absolute http(s) URL
     for url in candidates:
-        if url.startswith("https://") and not url.endswith((".svg", ".ico")):
+        if url.startswith(("https://", "http://")) and not url.endswith((".svg", ".ico")):
             return url
     return ""
 
diff --git a/tests/test_og_image_extraction.py b/tests/test_og_image_extraction.py
new file mode 100644
index 0000000..164d51a
--- /dev/null
+++ b/tests/test_og_image_extraction.py
@@ -0,0 +1,32 @@
+"""Tests for og:image extraction (src/search/content.py)."""
+import pytest
+
+pytest.importorskip("bs4")
+from bs4 import BeautifulSoup
+
+from src.search.content import _extract_og_image
+
+
+def _soup(html: str) -> BeautifulSoup:
+    return BeautifulSoup(html, "html.parser")
+
+
+def test_accepts_http_og_image():
+    # Regression: only https URLs were returned, so plain-http og:image
+    # (still common) yielded no thumbnail despite the docstring promising
+    # "http(s)".
+    html = '<meta property="og:image" content="http://example.com/cover.jpg">'
+    assert _extract_og_image(_soup(html)) == "http://example.com/cover.jpg"
+
+
+def test_still_accepts_https_og_image():
+    html = '<meta property="og:image" content="https://example.com/cover.png">'
+    assert _extract_og_image(_soup(html)) == "https://example.com/cover.png"
+
+
+def test_skips_relative_and_svg():
+    html = (
+        '<meta property="og:image" content="/relative/logo.png">'
+        '<meta name="twitter:image" content="https://example.com/icon.svg">'
+    )
+    assert _extract_og_image(_soup(html)) == ""