Void-Homelab/workers/tests/test_model.py

from unittest.mock import patch, MagicMock
from void_workers import model


def test_model_returns_singleton(monkeypatch):
    m = MagicMock()
    monkeypatch.setattr(model, "_whisper_model", None)
    with patch("void_workers.model.cuda_available", return_value=False):
        with patch("faster_whisper.WhisperModel", return_value=m):
            a = model.whisper_model()
            b = model.whisper_model()
            assert a is b


def test_uses_gpu_when_available(monkeypatch):
    monkeypatch.setattr(model, "_whisper_model", None)
    with patch("void_workers.model.cuda_available", return_value=True):
        with patch("void_workers.gpu.free_ollama_vram", return_value=[]):
            with patch("faster_whisper.WhisperModel", return_value=MagicMock()) as WM:
                model.whisper_model()
                assert WM.call_args.kwargs["device"] == "cuda"
                assert WM.call_args.kwargs["compute_type"] == "float16"


def test_frees_ollama_before_gpu_load(monkeypatch):
    # Ollama VRAM must be freed BEFORE the cuda model is constructed.
    monkeypatch.setattr(model, "_whisper_model", None)
    order = []
    with patch("void_workers.model.cuda_available", return_value=True):
        with patch("void_workers.gpu.free_ollama_vram",
                   side_effect=lambda *a, **k: order.append("free")):
            with patch("faster_whisper.WhisperModel",
                       side_effect=lambda *a, **k: order.append("load") or MagicMock()):
                model.whisper_model()
    assert order == ["free", "load"]


def test_falls_back_to_cpu_when_cuda_load_fails(monkeypatch):
    # GPU is present but the model fails to load (e.g. VRAM exhausted): must
    # not raise — fall back to a CPU model instead of crashing the job.
    monkeypatch.setattr(model, "_whisper_model", None)
    cpu_model = MagicMock()

    def fake_ctor(name, device, compute_type, download_root):
        if device == "cuda":
            raise RuntimeError("CUDA failed to allocate memory")
        return cpu_model

    with patch("void_workers.model.cuda_available", return_value=True):
        with patch("void_workers.gpu.free_ollama_vram", return_value=[]):
            with patch("faster_whisper.WhisperModel", side_effect=fake_ctor):
                got = model.whisper_model()
    assert got is cpu_model


def test_transcribe_returns_joined_segments(monkeypatch):
    seg1 = MagicMock(text=" Hello world ")
    seg2 = MagicMock(text=" second line")
    fake_model = MagicMock()
    fake_model.transcribe.return_value = ([seg1, seg2], MagicMock())
    monkeypatch.setattr(model, "_whisper_model", fake_model)
    out = model.whisper_transcribe("/tmp/whatever.opus")
    assert "Hello world" in out
    assert "second line" in out