Void-Homelab/workers/tests/test_model.py

from unittest.mock import patch, MagicMock
from void_workers import model


def test_model_returns_singleton(monkeypatch):
    m = MagicMock()
    monkeypatch.setattr(model, "_whisper_model", None)
    with patch("void_workers.model.cuda_available", return_value=False):
        with patch("faster_whisper.WhisperModel", return_value=m):
            a = model.whisper_model()
            b = model.whisper_model()
            assert a is b


def test_uses_gpu_when_available(monkeypatch):
    monkeypatch.setattr(model, "_whisper_model", None)
    with patch("void_workers.model.cuda_available", return_value=True):
        with patch("faster_whisper.WhisperModel", return_value=MagicMock()) as WM:
            model.whisper_model()
            assert WM.call_args.kwargs["device"] == "cuda"
            assert WM.call_args.kwargs["compute_type"] == "float16"


def test_falls_back_to_cpu_when_cuda_load_fails(monkeypatch):
    # GPU is present but the model fails to load (e.g. VRAM exhausted): must
    # not raise — fall back to a CPU model instead of crashing the job.
    monkeypatch.setattr(model, "_whisper_model", None)
    cpu_model = MagicMock()

    def fake_ctor(name, device, compute_type, download_root):
        if device == "cuda":
            raise RuntimeError("CUDA failed to allocate memory")
        return cpu_model

    with patch("void_workers.model.cuda_available", return_value=True):
        with patch("faster_whisper.WhisperModel", side_effect=fake_ctor):
            got = model.whisper_model()
    assert got is cpu_model


def test_transcribe_returns_joined_segments(monkeypatch):
    seg1 = MagicMock(text=" Hello world ")
    seg2 = MagicMock(text=" second line")
    fake_model = MagicMock()
    fake_model.transcribe.return_value = ([seg1, seg2], MagicMock())
    monkeypatch.setattr(model, "_whisper_model", fake_model)
    out = model.whisper_transcribe("/tmp/whatever.opus")
    assert "Hello world" in out
    assert "second line" in out