diff --git a/requirements-optional.txt b/requirements-optional.txt
index 72d9f7e..d4900fe 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -4,6 +4,14 @@
 # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic
 # memory, and tool selection are core paths, so they ship by default now.
 
+# Local speech-to-text (microphone -> text) via faster-whisper, for the
+# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no
+# torch needed). Install if you want to dictate/transcribe with the mic
+# without sending audio to an external endpoint.
+# Optional extra: install `torch` too if you have a CUDA GPU and want
+# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise.
+faster-whisper
+
 # DuckDuckGo as a search provider option.
 # Install if you want DDG in the search-provider dropdown.
 # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
diff --git a/services/stt/stt_service.py b/services/stt/stt_service.py
index 55e57af..0587128 100644
--- a/services/stt/stt_service.py
+++ b/services/stt/stt_service.py
@@ -59,17 +59,29 @@ class STTService:
         if self._whisper_model is None:
             try:
                 from faster_whisper import WhisperModel
-                settings = self._load_settings()
-                model_size = settings.get("stt_model", "base")
-                # Use CPU by default; will use CUDA if available
-                import torch
-                device = "cuda" if torch.cuda.is_available() else "cpu"
-                compute_type = "float16" if device == "cuda" else "int8"
-                self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
-                logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
             except ImportError:
                 logger.warning("faster-whisper not installed. Install with: pip install faster-whisper")
                 return None
+            try:
+                settings = self._load_settings()
+                model_size = settings.get("stt_model", "base")
+                # faster-whisper runs on CTranslate2, not torch. torch is only
+                # used (optionally) to detect a CUDA device for acceleration —
+                # if it's missing or unusable we just run on CPU. Keeping this
+                # probe separate (and tolerant of any failure, e.g. a broken
+                # CUDA/torch install that raises OSError on import) means a
+                # torch-less or torch-broken machine still does CPU
+                # transcription instead of failing with a misleading
+                # "faster-whisper not installed" error.
+                try:
+                    import torch
+                    use_cuda = torch.cuda.is_available()
+                except Exception:
+                    use_cuda = False
+                device = "cuda" if use_cuda else "cpu"
+                compute_type = "float16" if device == "cuda" else "int8"
+                self._whisper_model = WhisperModel(model_size, device=device, compute_type=compute_type)
+                logger.info(f"faster-whisper model '{model_size}' loaded on {device}")
             except Exception as e:
                 logger.error(f"Failed to load whisper model: {e}")
                 return None