diff --git a/src/teacher_escalation.py b/src/teacher_escalation.py index 56b8ce6..e830ce1 100644 --- a/src/teacher_escalation.py +++ b/src/teacher_escalation.py @@ -112,7 +112,7 @@ def evaluate_turn_regex( return ("failure", f"tool result matched error pattern {pat.pattern!r}: {snippet!r}") # Agent verbally gave up? - if agent_reply: + if isinstance(agent_reply, str) and agent_reply: for pat in _REPLY_GIVE_UP_PATTERNS: m = pat.search(agent_reply) if m: diff --git a/tests/test_teacher_eval_nonstring_reply.py b/tests/test_teacher_eval_nonstring_reply.py new file mode 100644 index 0000000..73a179a --- /dev/null +++ b/tests/test_teacher_eval_nonstring_reply.py @@ -0,0 +1,14 @@ +from src.teacher_escalation import evaluate_turn_regex + + +def test_evaluate_turn_regex_tolerates_non_string_reply(): + # agent_reply is typed str but is the raw LLM turn output; a non-string + # (dict / number from a malformed turn) made pat.search(agent_reply) raise + # TypeError. The tool_results branch already isinstance-guards its rows. + assert evaluate_turn_regex([], 123) == ("ok", None) + assert evaluate_turn_regex([], {"text": "I cannot do that"}) == ("ok", None) + + +def test_evaluate_turn_regex_still_flags_give_up_string(): + status, _ = evaluate_turn_regex([], "I don't have a tool to do that") + assert status == "failure"