fix(stt): make local microphone transcription work without torch (#801)
faster-whisper runs on CTranslate2, not torch, but _get_whisper() imported torch (only to check cuda availability) inside the same try as the faster-whisper import. on a torch-less machine that raised ImportError and reported the misleading 'faster-whisper not installed' even when it was installed, so local mic transcription silently failed. probe torch separately and optionally: present -> cuda, absent -> cpu. also declare faster-whisper in requirements-optional.txt (torch stays an optional extra for gpu).
This commit is contained in:
@@ -4,6 +4,14 @@
|
||||
# Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic
|
||||
# memory, and tool selection are core paths, so they ship by default now.
|
||||
|
||||
# Local speech-to-text (microphone -> text) via faster-whisper, for the
|
||||
# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no
|
||||
# torch needed). Install if you want to dictate/transcribe with the mic
|
||||
# without sending audio to an external endpoint.
|
||||
# Optional extra: install `torch` too if you have a CUDA GPU and want
|
||||
# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise.
|
||||
faster-whisper
|
||||
|
||||
# DuckDuckGo as a search provider option.
|
||||
# Install if you want DDG in the search-provider dropdown.
|
||||
# Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
|
||||
|
||||
@@ -59,17 +59,29 @@ class STTService:
|
||||
if self._whisper_model is None:
|
||||
try:
|
||||
from faster_whisper import WhisperModel
|
||||
settings = self._load_settings()
|
||||
model_size = settings.get("stt_model", "base")
|
||||
# Use CPU by default; will use CUDA if available
|
||||
import torch
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
compute_type = "float16" if device == "cuda" else "int8"
|
||||
self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
||||
logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
|
||||
except ImportError:
|
||||
logger.warning("faster-whisper not installed. Install with: pip install faster-whisper")
|
||||
return None
|
||||
try:
|
||||
settings = self._load_settings()
|
||||
model_size = settings.get("stt_model", "base")
|
||||
# faster-whisper runs on CTranslate2, not torch. torch is only
|
||||
# used (optionally) to detect a CUDA device for acceleration —
|
||||
# if it's missing or unusable we just run on CPU. Keeping this
|
||||
# probe separate (and tolerant of any failure, e.g. a broken
|
||||
# CUDA/torch install that raises OSError on import) means a
|
||||
# torch-less or torch-broken machine still does CPU
|
||||
# transcription instead of failing with a misleading
|
||||
# "faster-whisper not installed" error.
|
||||
try:
|
||||
import torch
|
||||
use_cuda = torch.cuda.is_available()
|
||||
except Exception:
|
||||
use_cuda = False
|
||||
device = "cuda" if use_cuda else "cpu"
|
||||
compute_type = "float16" if device == "cuda" else "int8"
|
||||
self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
|
||||
logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load whisper model: {e}")
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user