21 lines
740 B
Python
21 lines
740 B
Python
from src.youtube_handler import format_transcript_for_context
|
|
|
|
|
|
def test_format_transcript_skips_non_dict_segments():
|
|
# segments come from the parsed transcript JSON; a malformed entry (None or
|
|
# a bare string) made seg['timestamp'] raise TypeError and lose the whole
|
|
# timestamped transcript.
|
|
data = {
|
|
"success": True, "transcript": "full text", "video_id": "x",
|
|
"segments": [
|
|
{"timestamp": "0:01", "text": "hello"},
|
|
"junk-seg",
|
|
None,
|
|
{"timestamp": "0:05", "text": "world"},
|
|
],
|
|
}
|
|
out = format_transcript_for_context(data, "https://youtu.be/x")
|
|
assert "[0:01] hello" in out
|
|
assert "[0:05] world" in out
|
|
assert "junk-seg" not in out
|