odysseus/tests/test_youtube_transcript_seg_nondict.py

from src.youtube_handler import format_transcript_for_context


def test_format_transcript_skips_non_dict_segments():
    # segments come from the parsed transcript JSON; a malformed entry (None or
    # a bare string) made seg['timestamp'] raise TypeError and lose the whole
    # timestamped transcript.
    data = {
        "success": True, "transcript": "full text", "video_id": "x",
        "segments": [
            {"timestamp": "0:01", "text": "hello"},
            "junk-seg",
            None,
            {"timestamp": "0:05", "text": "world"},
        ],
    }
    out = format_transcript_for_context(data, "https://youtu.be/x")
    assert "[0:01] hello" in out
    assert "[0:05] world" in out
    assert "junk-seg" not in out