feat(workers): whisper loader with CUDA detect + CPU fallback

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-01 10:06:50 +10:00
parent 2adeae555d
commit e64f1345f6
2 changed files with 57 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
import os
from .log import log
_whisper_model = None
def cuda_available():
try:
import ctranslate2
return ctranslate2.get_cuda_device_count() > 0
except Exception as e:
log.info("ctranslate2_cuda_probe_failed", err=str(e))
return False
def whisper_model():
global _whisper_model
if _whisper_model is None:
from faster_whisper import WhisperModel
name = os.environ.get("WHISPER_MODEL", "small.en")
cache = os.environ.get("WHISPER_CACHE", "/var/lib/void/whisper-models")
device = "cuda" if cuda_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
log.info("whisper_loading", model=name, device=device,
compute_type=compute_type, cache=cache)
_whisper_model = WhisperModel(
name, device=device, compute_type=compute_type, download_root=cache
)
return _whisper_model
def whisper_transcribe(audio_path):
segments, _info = whisper_model().transcribe(audio_path, vad_filter=True)
return "\n".join(s.text.strip() for s in segments).strip()