feat(dross): voice Phase 2a — local whisper transcribe + mic (2.12.0)

faster-whisper (small.en, GPU+CPU fallback) on CT 102 → POST /api/voice/transcribe (multer→whisper client) → mic in the bubble records (MediaRecorder), uploads, drops the transcript into the input to review-and-send. Infra scripts in deploy/whisper/. Retention (P2b) next. NOTE: mic needs a secure context (the https domain), not the LAN IP. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 01:00:10 +10:00
parent fc1e93a58f
commit e29bacbda1
10 changed files with 196 additions and 3 deletions
--- a/deploy/whisper/server.py
+++ b/deploy/whisper/server.py
@@ -0,0 +1,35 @@
+import os, tempfile
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from faster_whisper import WhisperModel
+
+MODEL = os.environ.get("WHISPER_MODEL", "small.en")
+app = FastAPI()
+model = None
+device_used = None
+
+def load():
+    global model, device_used
+    try:
+        model = WhisperModel(MODEL, device="cuda", compute_type="int8_float16")
+        device_used = "cuda"
+    except Exception:
+        model = WhisperModel(MODEL, device="cpu", compute_type="int8")
+        device_used = "cpu"
+
+load()
+
+@app.get("/health")
+def health():
+    return {"ok": True, "model": MODEL, "device": device_used}
+
+@app.post("/transcribe")
+async def transcribe(file: UploadFile = File(...)):
+    data = await file.read()
+    if not data:
+        raise HTTPException(400, "empty audio")
+    with tempfile.NamedTemporaryFile(suffix=".bin") as f:
+        f.write(data); f.flush()
+        segments, info = model.transcribe(f.name, beam_size=1, vad_filter=True)
+        text = "".join(s.text for s in segments).strip()
+    return {"text": text, "language": info.language,
+            "duration": round(info.duration, 2), "device": device_used}