faster-whisper (small.en, GPU+CPU fallback) on CT 102 → POST /api/voice/transcribe (multer→whisper client) → mic in the bubble records (MediaRecorder), uploads, drops the transcript into the input to review-and-send. Infra scripts in deploy/whisper/. Retention (P2b) next. NOTE: mic needs a secure context (the https domain), not the LAN IP. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
25 lines
1.1 KiB
JavaScript
25 lines
1.1 KiB
JavaScript
import { Router } from 'express';
|
|
import multer from 'multer';
|
|
import { asyncWrap } from '../errors.js';
|
|
import { requireOwner } from '../cap.js';
|
|
import * as whisper from '../../voice/whisper.js';
|
|
export const router = Router();
|
|
|
|
// In-memory upload; clips are small voice notes. 25 MB ceiling.
|
|
const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
|
|
|
|
// POST /api/voice/transcribe — owner-only. multipart field `audio`. Returns { text }.
|
|
// (Phase 2b will optionally persist the clip + transcript when keepClips is on.)
|
|
router.post('/transcribe', requireOwner, upload.single('audio'), asyncWrap(async (req, res) => {
|
|
if (!req.file || !req.file.buffer?.length) {
|
|
return res.status(400).json({ error: { code: 'no_audio', message: 'no audio supplied' } });
|
|
}
|
|
try {
|
|
const r = await whisper.transcribe(
|
|
req.file.buffer, req.file.originalname || 'clip.webm', req.file.mimetype || 'audio/webm');
|
|
res.json({ text: r.text, duration: r.duration ?? null });
|
|
} catch {
|
|
res.status(503).json({ error: { code: 'stt_unavailable', message: 'transcription service unavailable' } });
|
|
}
|
|
}));
|