fix(tts): tolerate a malformed tts_speed instead of 500-ing (#1450)

synthesize() and get_stats() parsed the stored tts_speed with a bare
float(settings.get("tts_speed", "1")). The manage_settings agent tool maps
"speech speed"/"voice speed" to tts_speed and, because the setting's default is
a string, writes the value through unvalidated — so an agent (or a hand-edited
settings.json) can store "fast" or "". After that, GET /api/tts/stats and POST
/api/tts/synthesize both 500 with ValueError until the JSON is corrected by hand.

Parse defensively via a _safe_speed() helper (non-numeric/empty/<=0 -> 1.0),
mirroring the settings layer's tolerance of corrupt config.

Adds tests/test_tts_speed_malformed.py (stats + synthesize) — both raise
ValueError before this change and pass after.
This commit is contained in:
Shaw
2026-06-03 01:12:03 -04:00
committed by GitHub
parent 844dbf6a22
commit d38fb4bc46
2 changed files with 45 additions and 2 deletions

View File

@@ -12,6 +12,18 @@ from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
def _safe_speed(value, default: float = 1.0) -> float:
"""Parse the stored tts_speed defensively. The settings layer tolerates
corrupt/agent-written config, so a non-numeric or empty value (e.g. an agent
setting "speech speed" = "fast", or a hand-edited settings.json) must not
crash synthesis or the stats endpoint with a ValueError."""
try:
speed = float(value)
except (TypeError, ValueError):
return default
return speed if speed > 0 else default
class TTSService:
"""Multi-provider TTS service.
@@ -136,7 +148,7 @@ class TTSService:
provider = settings["tts_provider"]
model = settings["tts_model"]
voice = settings["tts_voice"]
speed = float(settings.get("tts_speed", "1"))
speed = _safe_speed(settings.get("tts_speed", "1"))
if provider in ("disabled", "browser"):
return None
@@ -198,7 +210,7 @@ class TTSService:
"provider": provider,
"model": settings["tts_model"],
"voice": settings["tts_voice"],
"speed": float(settings.get("tts_speed", "1")),
"speed": _safe_speed(settings.get("tts_speed", "1")),
"cache_entries": len(cache_files),
"cache_size_mb": round(cache_size / (1024 * 1024), 2),
}

View File

@@ -0,0 +1,31 @@
"""Regression: a malformed tts_speed must not crash TTS.
services/tts/tts_service.py read `float(settings.get("tts_speed", "1"))` with no
guard in both synthesize() and get_stats(). The manage_settings agent tool maps
"speech speed"/"voice speed" to tts_speed and (because the default is a string)
writes the value through unvalidated, so an agent or a hand-edited settings.json
could store "fast"/"" and then GET /api/tts/stats and POST /api/tts/synthesize
both 500 with ValueError until the JSON is fixed by hand. The settings layer
tolerates corrupt config; this consumer now does too.
"""
from services.tts.tts_service import TTSService
_BAD_SETTINGS = {
"tts_enabled": True, "tts_provider": "browser",
"tts_model": "tts-1", "tts_voice": "alloy", "tts_speed": "fast",
}
def test_get_stats_does_not_crash_on_malformed_speed(monkeypatch, tmp_path):
service = TTSService(cache_dir=str(tmp_path))
monkeypatch.setattr(service, "_load_settings", lambda: dict(_BAD_SETTINGS))
stats = service.get_stats() # raised ValueError before the fix
assert stats["speed"] == 1.0
def test_synthesize_does_not_crash_on_malformed_speed(monkeypatch, tmp_path):
service = TTSService(cache_dir=str(tmp_path))
monkeypatch.setattr(service, "_load_settings", lambda: dict(_BAD_SETTINGS))
# 'browser' provider returns None after the (now guarded) speed parse;
# the point is that the malformed speed no longer raises ValueError first.
assert service.synthesize("hello", use_cache=False) is None