"""Streaming tool-call accumulation tests for the OpenAI-compatible path. Regression for Gemini's OpenAI-compat layer, which (a) attaches an opaque thought_signature in `extra_content` on the function-call delta and (b) omits `index` on PARALLEL tool calls — every parallel delta arrives as index=None. The accumulator must give each parallel call its own slot (otherwise they collide into slot 0, overwriting the first call's name and concatenating — corrupting — its arguments) and must preserve extra_content per call. """ import json import asyncio from src import llm_core class _FakeResp: def __init__(self, lines): self._lines = lines self.status_code = 200 async def aiter_lines(self): for ln in self._lines: yield ln async def aread(self): return b"" class _FakeStreamCtx: def __init__(self, lines): self._lines = lines async def __aenter__(self): return _FakeResp(self._lines) async def __aexit__(self, *a): return False class _FakeClient: def __init__(self, lines): self._lines = lines def stream(self, method, url, **kw): return _FakeStreamCtx(self._lines) def _drive(monkeypatch, lines, model="gemini-3.1-pro-preview-customtools"): """Run stream_llm against a canned SSE line list; return parsed events.""" monkeypatch.setattr(llm_core, "_get_http_client", lambda: _FakeClient(lines)) monkeypatch.setattr(llm_core, "_is_host_dead", lambda u: False) monkeypatch.setattr(llm_core, "note_model_activity", lambda *a, **k: None) monkeypatch.setattr(llm_core, "_clear_host_dead", lambda *a, **k: None) async def run(): events = [] async for chunk in llm_core.stream_llm( "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions", model, [{"role": "user", "content": "hi"}], headers={"Authorization": "Bearer k"}, tools=[{"type": "function", "function": {"name": "x", "parameters": {}}}], ): for ln in chunk.split("\n"): ln = ln.strip() if ln.startswith("data: ") and ln[6:] != "[DONE]": try: events.append(json.loads(ln[6:])) except ValueError: pass return events return asyncio.run(run()) def _sse(delta): return "data: " + json.dumps({"choices": [{"delta": delta}]}) def test_parallel_calls_with_null_index_do_not_collide(monkeypatch): # Two parallel calls, each complete in one delta, both with index=None # (exactly what Gemini's OpenAI-compat layer emits). Only the first carries # a thought_signature. lines = [ _sse({"tool_calls": [{ "index": None, "id": "call_a", "type": "function", "function": {"name": "get_memory", "arguments": "{}"}, "extra_content": {"google": {"thought_signature": "SIG0"}}, }]}), _sse({"tool_calls": [{ "index": None, "id": "call_b", "type": "function", "function": {"name": "bash", "arguments": '{"command":"echo hi"}'}, }]}), "data: [DONE]", ] events = _drive(monkeypatch, lines) calls = next(e["calls"] for e in events if e.get("type") == "tool_calls") assert len(calls) == 2, f"parallel calls collided: {calls}" by_name = {c["name"]: c for c in calls} assert set(by_name) == {"get_memory", "bash"} # arguments are NOT corrupted by concatenation assert by_name["get_memory"]["arguments"] == "{}" assert by_name["bash"]["arguments"] == '{"command":"echo hi"}' # signature preserved on the first call only, exactly as received assert by_name["get_memory"]["extra_content"] == {"google": {"thought_signature": "SIG0"}} assert "extra_content" not in by_name["bash"] def test_single_call_chunked_arguments_still_accumulate(monkeypatch): # Conformant OpenAI style: index present, arguments streamed in pieces. lines = [ _sse({"tool_calls": [{"index": 0, "id": "c", "type": "function", "function": {"name": "search", "arguments": '{"q":"'}}]}), _sse({"tool_calls": [{"index": 0, "function": {"arguments": 'cats"}'}}]}), "data: [DONE]", ] events = _drive(monkeypatch, lines, model="gpt-4o-test") calls = next(e["calls"] for e in events if e.get("type") == "tool_calls") assert len(calls) == 1 assert calls[0]["name"] == "search" assert calls[0]["arguments"] == '{"q":"cats"}' def test_null_index_chunked_arguments_attach_to_last_call(monkeypatch): # index=None where the name arrives first, then an arg-only continuation: # the continuation must attach to the just-started call, not open a new one. lines = [ _sse({"tool_calls": [{"index": None, "id": "c", "type": "function", "function": {"name": "search", "arguments": '{"q":'}}]}), _sse({"tool_calls": [{"index": None, "function": {"arguments": '"dogs"}'}}]}), "data: [DONE]", ] events = _drive(monkeypatch, lines) calls = next(e["calls"] for e in events if e.get("type") == "tool_calls") assert len(calls) == 1, f"continuation opened a spurious call: {calls}" assert calls[0]["arguments"] == '{"q":"dogs"}' def test_sparse_integer_indices_then_null_do_not_collide(monkeypatch): # Hardening: a provider that uses sparse integer indices (0 and 2) and then # a null-index call must allocate ABOVE the max key, not at len()==2 (which # would overwrite slot 2). Three distinct calls must survive. lines = [ _sse({"tool_calls": [{"index": 0, "id": "a", "function": {"name": "f0", "arguments": "{}"}}]}), _sse({"tool_calls": [{"index": 2, "id": "b", "function": {"name": "f2", "arguments": "{}"}}]}), _sse({"tool_calls": [{"index": None, "id": "c", "function": {"name": "fn", "arguments": "{}"}}]}), "data: [DONE]", ] events = _drive(monkeypatch, lines) calls = next(e["calls"] for e in events if e.get("type") == "tool_calls") assert sorted(c["name"] for c in calls) == ["f0", "f2", "fn"], f"collision: {calls}"