feat(dross): voice Phase 2a — local whisper transcribe + mic (2.12.0)

faster-whisper (small.en, GPU+CPU fallback) on CT 102 → POST /api/voice/transcribe (multer→whisper client) → mic in the bubble records (MediaRecorder), uploads, drops the transcript into the input to review-and-send. Infra scripts in deploy/whisper/. Retention (P2b) next. NOTE: mic needs a secure context (the https domain), not the LAN IP. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-10 01:00:10 +10:00
parent fc1e93a58f
commit e29bacbda1
10 changed files with 196 additions and 3 deletions
--- a/lib/api/index.js
+++ b/lib/api/index.js
@@ -39,6 +39,7 @@ import { router as backupsRouter } from './routes/backups.js';
 import { router as kuttRouter } from './routes/kutt.js';
 import { router as themeRouter } from './routes/theme.js';
 import { router as drossRouter } from './routes/dross.js';
+import { router as voiceRouter } from './routes/voice.js';

 export function mountApi(app) {
  const api = Router();
@@ -75,6 +76,7 @@ export function mountApi(app) {
  api.use('/kutt', kuttRouter);
  api.use('/theme', themeRouter);
  api.use('/dross', drossRouter);
+  api.use('/voice', voiceRouter);
  api.use('/pending-changes', pendingChangesRouter);
  api.use('/audit', auditRouter);
  api.use('/search', searchRouter);
--- a/lib/api/routes/voice.js
+++ b/lib/api/routes/voice.js
@@ -0,0 +1,24 @@
+import { Router } from 'express';
+import multer from 'multer';
+import { asyncWrap } from '../errors.js';
+import { requireOwner } from '../cap.js';
+import * as whisper from '../../voice/whisper.js';
+export const router = Router();
+
+// In-memory upload; clips are small voice notes. 25 MB ceiling.
+const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
+
+// POST /api/voice/transcribe — owner-only. multipart field `audio`. Returns { text }.
+// (Phase 2b will optionally persist the clip + transcript when keepClips is on.)
+router.post('/transcribe', requireOwner, upload.single('audio'), asyncWrap(async (req, res) => {
+  if (!req.file || !req.file.buffer?.length) {
+    return res.status(400).json({ error: { code: 'no_audio', message: 'no audio supplied' } });
+  }
+  try {
+    const r = await whisper.transcribe(
+      req.file.buffer, req.file.originalname || 'clip.webm', req.file.mimetype || 'audio/webm');
+    res.json({ text: r.text, duration: r.duration ?? null });
+  } catch {
+    res.status(503).json({ error: { code: 'stt_unavailable', message: 'transcription service unavailable' } });
+  }
+}));
--- a/lib/voice/whisper.js
+++ b/lib/voice/whisper.js
@@ -0,0 +1,22 @@
+// Thin client for the local faster-whisper service on CT 102 (the Ollama box).
+// GPU with CPU fallback lives in the service itself; here we just POST the audio
+// buffer and return the transcript. LAN-only endpoint.
+const WHISPER_URL = process.env.WHISPER_URL || 'http://192.168.1.185:8001';
+
+export async function transcribe(buffer, filename = 'clip.webm', mime = 'audio/webm') {
+  const fd = new FormData();
+  fd.append('file', new Blob([buffer], { type: mime }), filename);
+  const res = await fetch(`${WHISPER_URL}/transcribe`, {
+    method: 'POST', body: fd, signal: AbortSignal.timeout(120000)
+  });
+  if (!res.ok) throw new Error(`whisper ${res.status}`);
+  const j = await res.json();
+  return { text: (j.text || '').trim(), duration: j.duration, device: j.device };
+}
+
+export async function health() {
+  try {
+    const res = await fetch(`${WHISPER_URL}/health`, { signal: AbortSignal.timeout(5000) });
+    return res.ok ? await res.json() : null;
+  } catch { return null; }
+}