faster-whisper (small.en, GPU+CPU fallback) on CT 102 → POST /api/voice/transcribe (multer→whisper client) → mic in the bubble records (MediaRecorder), uploads, drops the transcript into the input to review-and-send. Infra scripts in deploy/whisper/. Retention (P2b) next. NOTE: mic needs a secure context (the https domain), not the LAN IP. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
36 lines
1.1 KiB
Python
36 lines
1.1 KiB
Python
import os, tempfile
|
|
from fastapi import FastAPI, UploadFile, File, HTTPException
|
|
from faster_whisper import WhisperModel
|
|
|
|
MODEL = os.environ.get("WHISPER_MODEL", "small.en")
|
|
app = FastAPI()
|
|
model = None
|
|
device_used = None
|
|
|
|
def load():
|
|
global model, device_used
|
|
try:
|
|
model = WhisperModel(MODEL, device="cuda", compute_type="int8_float16")
|
|
device_used = "cuda"
|
|
except Exception:
|
|
model = WhisperModel(MODEL, device="cpu", compute_type="int8")
|
|
device_used = "cpu"
|
|
|
|
load()
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
return {"ok": True, "model": MODEL, "device": device_used}
|
|
|
|
@app.post("/transcribe")
|
|
async def transcribe(file: UploadFile = File(...)):
|
|
data = await file.read()
|
|
if not data:
|
|
raise HTTPException(400, "empty audio")
|
|
with tempfile.NamedTemporaryFile(suffix=".bin") as f:
|
|
f.write(data); f.flush()
|
|
segments, info = model.transcribe(f.name, beam_size=1, vad_filter=True)
|
|
text = "".join(s.text for s in segments).strip()
|
|
return {"text": text, "language": info.language,
|
|
"duration": round(info.duration, 2), "device": device_used}
|