"""Tests for og:image extraction (src/search/content.py).""" import pytest pytest.importorskip("bs4") from bs4 import BeautifulSoup from src.search.content import _extract_og_image def _soup(html: str) -> BeautifulSoup: return BeautifulSoup(html, "html.parser") def test_accepts_http_og_image(): # Regression: only https URLs were returned, so plain-http og:image # (still common) yielded no thumbnail despite the docstring promising # "http(s)". html = '' assert _extract_og_image(_soup(html)) == "http://example.com/cover.jpg" def test_still_accepts_https_og_image(): html = '' assert _extract_og_image(_soup(html)) == "https://example.com/cover.png" def test_skips_relative_and_svg(): html = ( '' '' ) assert _extract_og_image(_soup(html)) == ""