diff --git a/src/research_utils.py b/src/research_utils.py
index ec9cffa..9961848 100644
--- a/src/research_utils.py
+++ b/src/research_utils.py
@@ -39,9 +39,16 @@ LOW_QUALITY_MARKERS = [
     "unable to extract",
     "completely unrelated",
     "boilerplate",
-    "cookie",
     "footer text",
-    "copyright",
+    # Phrases (not bare "cookie"/"copyright") so we still catch boilerplate
+    # like consent banners and footers without discarding legitimate findings
+    # that merely discuss cookies or copyright as their subject.
+    "cookie consent",
+    "cookie banner",
+    "cookie notice",
+    "copyright notice",
+    "copyright footer",
+    "all rights reserved",
 ]
 
 
diff --git a/tests/test_research_utils.py b/tests/test_research_utils.py
index 12e4df6..52001d0 100644
--- a/tests/test_research_utils.py
+++ b/tests/test_research_utils.py
@@ -79,3 +79,19 @@ class TestIsLowQuality:
 
     def test_copyright_marker(self):
         assert is_low_quality("Just a copyright notice at the bottom.") is True
+
+    # Regression: bare "cookie"/"copyright" used to be substring markers, so
+    # legitimate findings that merely discuss them as their subject were
+    # discarded. They must now be kept.
+    def test_keeps_finding_about_copyright_law(self):
+        assert is_low_quality("This article explains the new EU copyright directive reforms.") is False
+
+    def test_keeps_finding_about_cookies(self):
+        assert is_low_quality("A technical guide to how tracking cookies and session cookies work.") is False
+
+    def test_keeps_recipe_mentioning_cookies(self):
+        assert is_low_quality("Recipe: the best chocolate chip cookies you will ever bake.") is False
+
+    # Boilerplate is still caught via phrases.
+    def test_cookie_consent_banner_still_filtered(self):
+        assert is_low_quality("The page is just a cookie consent banner.") is True