Files
Void-Homelab/workers/tests/test_model.py
root 3c028fed5a fix(workers): graceful GPU→CPU fallback for Whisper at load time
cuda_available() only covers "no GPU present". On a shared card the GPU
can exist but fail to load the model (VRAM exhausted by another process
e.g. Ollama). Try CUDA first, fall back to a CPU model on any load
error instead of crashing the transcription job. Supports HA portability
(node without GPU) and a contended GPU. Adds GPU-path + fallback tests.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 08:04:14 +10:00

50 lines
1.9 KiB
Python

from unittest.mock import patch, MagicMock
from void_workers import model
def test_model_returns_singleton(monkeypatch):
m = MagicMock()
monkeypatch.setattr(model, "_whisper_model", None)
with patch("void_workers.model.cuda_available", return_value=False):
with patch("faster_whisper.WhisperModel", return_value=m):
a = model.whisper_model()
b = model.whisper_model()
assert a is b
def test_uses_gpu_when_available(monkeypatch):
monkeypatch.setattr(model, "_whisper_model", None)
with patch("void_workers.model.cuda_available", return_value=True):
with patch("faster_whisper.WhisperModel", return_value=MagicMock()) as WM:
model.whisper_model()
assert WM.call_args.kwargs["device"] == "cuda"
assert WM.call_args.kwargs["compute_type"] == "float16"
def test_falls_back_to_cpu_when_cuda_load_fails(monkeypatch):
# GPU is present but the model fails to load (e.g. VRAM exhausted): must
# not raise — fall back to a CPU model instead of crashing the job.
monkeypatch.setattr(model, "_whisper_model", None)
cpu_model = MagicMock()
def fake_ctor(name, device, compute_type, download_root):
if device == "cuda":
raise RuntimeError("CUDA failed to allocate memory")
return cpu_model
with patch("void_workers.model.cuda_available", return_value=True):
with patch("faster_whisper.WhisperModel", side_effect=fake_ctor):
got = model.whisper_model()
assert got is cpu_model
def test_transcribe_returns_joined_segments(monkeypatch):
seg1 = MagicMock(text=" Hello world ")
seg2 = MagicMock(text=" second line")
fake_model = MagicMock()
fake_model.transcribe.return_value = ([seg1, seg2], MagicMock())
monkeypatch.setattr(model, "_whisper_model", fake_model)
out = model.whisper_transcribe("/tmp/whatever.opus")
assert "Hello world" in out
assert "second line" in out