82 lines
2.9 KiB
Python
82 lines
2.9 KiB
Python
"""Tests for research_utils.py — thinking block stripping and quality filtering."""
|
|
|
|
from src.research_utils import strip_thinking, is_low_quality
|
|
|
|
|
|
class TestStripThinking:
|
|
def test_removes_think_tags(self):
|
|
text = "<think>some internal reasoning</think>Final answer."
|
|
assert strip_thinking(text) == "Final answer."
|
|
|
|
def test_removes_thinking_tags(self):
|
|
text = "<thinking>some internal reasoning</thinking>Final answer."
|
|
assert strip_thinking(text) == "Final answer."
|
|
|
|
def test_removes_nested_tags(self):
|
|
text = "<think>outer <think>inner</think> still outer</think>Result."
|
|
result = strip_thinking(text)
|
|
assert "<think>" not in result
|
|
assert "Result." in result
|
|
|
|
def test_handles_orphaned_opening_tag(self):
|
|
text = "<think>unclosed reasoning block\nFinal answer."
|
|
result = strip_thinking(text)
|
|
assert "<think>" not in result
|
|
|
|
def test_handles_orphaned_closing_tag(self):
|
|
text = "Some text</think> and more."
|
|
result = strip_thinking(text)
|
|
assert "</think>" not in result
|
|
assert "Some text" in result
|
|
|
|
def test_empty_string(self):
|
|
assert strip_thinking("") == ""
|
|
|
|
def test_none_input(self):
|
|
assert strip_thinking(None) is None
|
|
|
|
def test_no_thinking_tags(self):
|
|
text = "Just a normal response with no tags."
|
|
assert strip_thinking(text) == text
|
|
|
|
def test_preserves_content_after_thinking(self):
|
|
text = "<think>planning step</think>\n\n# Report\n\nHere is the report."
|
|
result = strip_thinking(text)
|
|
assert "# Report" in result
|
|
assert "Here is the report." in result
|
|
|
|
def test_strips_qwen_thinking_process(self):
|
|
text = "Thinking Process: Let me analyze this carefully.\n\n# Answer\n\nThe answer is 42."
|
|
result = strip_thinking(text)
|
|
assert "Thinking Process" not in result
|
|
assert "The answer is 42." in result
|
|
|
|
|
|
class TestIsLowQuality:
|
|
def test_empty_string(self):
|
|
assert is_low_quality("") is True
|
|
|
|
def test_none_input(self):
|
|
assert is_low_quality(None) is True
|
|
|
|
def test_normal_summary(self):
|
|
assert is_low_quality("Python 3.12 introduces several new features.") is False
|
|
|
|
def test_insufficient_marker(self):
|
|
assert is_low_quality("The content is insufficient to answer.") is True
|
|
|
|
def test_no_relevant_info(self):
|
|
assert is_low_quality("No relevant information found in the source.") is True
|
|
|
|
def test_boilerplate(self):
|
|
assert is_low_quality("This page contains only boilerplate text.") is True
|
|
|
|
def test_unable_to_extract(self):
|
|
assert is_low_quality("Unable to extract meaningful data.") is True
|
|
|
|
def test_case_insensitive(self):
|
|
assert is_low_quality("UNABLE TO EXTRACT any data") is True
|
|
|
|
def test_copyright_marker(self):
|
|
assert is_low_quality("Just a copyright notice at the bottom.") is True
|