Treat Venice as a tool-capable SOTA cloud provider (#1173)

Follow-up to the Venice provider PR. Wire api.venice.ai into the three host allowlists so Venice behaves like the other paid OpenAI-compatible clouds: - agent_loop: add api.venice.ai to _API_HOSTS so the agent sends native OpenAI tool-call schemas (Venice supports function calling) instead of degrading to fenced-block parsing. - teacher_escalation: add api.venice.ai to _SOTA_HOSTS so the escalation loop stays OFF for Venice (it's a paid top-tier API; no need to add teacher-model latency). - webhook_routes: add venice to KNOWN_PROVIDERS so the sync chat webhook can auto-resolve base_url from provider=venice. Tests: tests/test_venice_hosts.py pins tool-host matching + SOTA classification for Venice; py_compile on touched modules. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-02 11:03:46 -03:00
parent 3799dc102f
commit c0c1ceb36d
4 changed files with 36 additions and 2 deletions
--- a/routes/webhook_routes.py
+++ b/routes/webhook_routes.py
@@ -197,6 +197,7 @@ def setup_webhook_routes(
        "openrouter": "https://openrouter.ai/api/v1",
        "ollama": "https://ollama.com/api",
        "fireworks": "https://api.fireworks.ai/inference/v1",
+        "venice": "https://api.venice.ai/api/v1",
    }

    # Model prefix → provider mapping for auto-detection
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -457,7 +457,7 @@ _API_HOSTS = frozenset([
    "api.deepseek.com", "deepseek.com",
    "api.together.xyz", "api.fireworks.ai",
    "api.perplexity.ai", "api.x.ai",
-    "ollama.com",
+    "ollama.com", "api.venice.ai",
    # Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
    # Without these, `_is_api_model` falls back to keyword sniffing on the
    # model name, so well-behaved local servers don't get native tool
--- a/src/teacher_escalation.py
+++ b/src/teacher_escalation.py
@@ -42,7 +42,7 @@ _SOTA_HOSTS = frozenset({
    "api.together.xyz", "api.fireworks.ai",
    "api.perplexity.ai", "api.x.ai",
    "generativelanguage.googleapis.com", "api.groq.com",
-    "openrouter.ai", "ollama.com",
+    "openrouter.ai", "ollama.com", "api.venice.ai",
 })


--- a/tests/test_venice_hosts.py
+++ b/tests/test_venice_hosts.py
@@ -0,0 +1,33 @@
+"""Venice host-allowlist behavior (follow-up to provider support).
+
+Venice (https://api.venice.ai/api/v1) is a paid, OpenAI-compatible cloud API
+with native tool-calling. These tests pin the three host-list integrations:
+  - agent loop sends native tool schemas to Venice (not fenced-block parsing),
+  - teacher escalation treats Venice as SOTA (loop OFF, no added latency).
+"""
+from src import agent_loop, teacher_escalation
+
+
+class TestAgentToolHosts:
+    def test_venice_in_api_hosts(self):
+        assert "api.venice.ai" in agent_loop._API_HOSTS
+
+    def test_venice_url_matches_api_host(self):
+        # Mirrors the runtime check: any(h in endpoint_url for h in _API_HOSTS)
+        url = "https://api.venice.ai/api/v1/chat/completions"
+        assert any(h in url for h in agent_loop._API_HOSTS)
+
+    def test_unknown_host_not_matched(self):
+        url = "https://example.invalid/v1/chat/completions"
+        assert not any(h in url for h in agent_loop._API_HOSTS)
+
+
+class TestTeacherEscalationSota:
+    def test_venice_is_sota_not_self_hosted(self):
+        assert teacher_escalation.is_self_hosted("https://api.venice.ai/api/v1/chat/completions") is False
+
+    def test_known_cloud_still_sota(self):
+        assert teacher_escalation.is_self_hosted("https://api.openai.com/v1") is False
+
+    def test_local_endpoint_still_self_hosted(self):
+        assert teacher_escalation.is_self_hosted("http://localhost:8000/v1") is True