feat(dross): voice Phase 2b — clip retention (2.13.0)

'Keep voice clips' setting (default off). When on, /api/voice/transcribe
saves the audio (0600) to the owner-only ZFS subvol at /var/lib/void/
voice-clips (CT 311 mp0, replicated to Z3) + a voice_clips row (migration
029, transcript+metadata in void-db). New clips list/play/delete API +
Settings UI. Storage path is configurable (VOICE_CLIPS_DIR).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
root
2026-06-10 01:27:40 +10:00
parent bc55da6b1e
commit 70bdba1a24
8 changed files with 159 additions and 11 deletions

View File

@@ -10,7 +10,7 @@ import * as agents from '../../db/repos/agents.js';
import { runAgentTurn } from '../../ai/agent/run_turn.js';
import { personaFor } from '../../ai/personas/index.js';
const DEFAULT_SETTINGS = { avatar: 'soft-eye', accent: '#a86adf', persona: '', voiceMode: 'review' };
const DEFAULT_SETTINGS = { avatar: 'soft-eye', accent: '#a86adf', persona: '', voiceMode: 'review', keepClips: false };
const COMPANION_SLUG = 'companion';
export const router = Router();
@@ -23,7 +23,8 @@ const settingsBody = z.object({
avatar: z.enum(['soft-eye', 'wisp', 'motes']),
accent: z.string().regex(/^#[0-9a-fA-F]{6}$/),
persona: z.string().max(8000),
voiceMode: z.enum(['review', 'handsfree', 'action'])
voiceMode: z.enum(['review', 'handsfree', 'action']),
keepClips: z.boolean().default(false)
});
router.put('/settings', requireOwner, validate({ body: settingsBody }),
asyncWrap(async (req, res) => res.json(await settings.set('dross', req.body))));

View File

@@ -1,24 +1,73 @@
import { Router } from 'express';
import multer from 'multer';
import { randomUUID } from 'node:crypto';
import { createReadStream } from 'node:fs';
import { writeFile, unlink } from 'node:fs/promises';
import path from 'node:path';
import { asyncWrap } from '../errors.js';
import { requireOwner } from '../cap.js';
import * as whisper from '../../voice/whisper.js';
import * as settings from '../../db/repos/app_settings.js';
import * as clips from '../../db/repos/voice_clips.js';
export const router = Router();
const CLIPS_DIR = process.env.VOICE_CLIPS_DIR || '/var/lib/void/voice-clips';
// In-memory upload; clips are small voice notes. 25 MB ceiling.
const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 25 * 1024 * 1024 } });
function extFor(mime = '') {
if (mime.includes('ogg')) return '.ogg';
if (mime.includes('mp4') || mime.includes('m4a')) return '.m4a';
if (mime.includes('wav')) return '.wav';
return '.webm';
}
// POST /api/voice/transcribe — owner-only. multipart field `audio`. Returns { text }.
// (Phase 2b will optionally persist the clip + transcript when keepClips is on.)
// When the Dross "keepClips" setting is on, the clip + transcript are retained.
router.post('/transcribe', requireOwner, upload.single('audio'), asyncWrap(async (req, res) => {
if (!req.file || !req.file.buffer?.length) {
return res.status(400).json({ error: { code: 'no_audio', message: 'no audio supplied' } });
}
let r;
try {
const r = await whisper.transcribe(
r = await whisper.transcribe(
req.file.buffer, req.file.originalname || 'clip.webm', req.file.mimetype || 'audio/webm');
res.json({ text: r.text, duration: r.duration ?? null });
} catch {
res.status(503).json({ error: { code: 'stt_unavailable', message: 'transcription service unavailable' } });
return res.status(503).json({ error: { code: 'stt_unavailable', message: 'transcription service unavailable' } });
}
const cfg = await settings.get('dross', {});
let clip_id = null;
if (cfg?.keepClips) {
try {
const id = randomUUID();
const mime = req.file.mimetype || 'audio/webm';
const filePath = path.join(CLIPS_DIR, id + extFor(mime));
await writeFile(filePath, req.file.buffer, { mode: 0o600 });
const row = await clips.create({
transcript: r.text, duration_ms: r.duration != null ? Math.round(r.duration * 1000) : null,
bytes: req.file.buffer.length, mime, path: filePath
});
clip_id = row.id;
} catch { /* retention is best-effort; never fail the transcript */ }
}
res.json({ text: r.text, duration: r.duration ?? null, clip_id });
}));
// GET /api/voice/clips — list retained clips (owner).
router.get('/clips', requireOwner, asyncWrap(async (_req, res) => res.json(await clips.list())));
// GET /api/voice/clips/:id/audio — stream the audio file (owner).
router.get('/clips/:id/audio', requireOwner, asyncWrap(async (req, res) => {
const c = await clips.get(req.params.id);
if (!c) return res.status(404).json({ error: { code: 'not_found', message: 'clip not found' } });
res.setHeader('Content-Type', c.mime || 'audio/webm');
createReadStream(c.path).on('error', () => res.status(404).end()).pipe(res);
}));
// DELETE /api/voice/clips/:id — remove the row + the file (owner).
router.delete('/clips/:id', requireOwner, asyncWrap(async (req, res) => {
const removed = await clips.remove(req.params.id);
if (removed?.path) { try { await unlink(removed.path); } catch { /* file may be gone */ } }
res.status(204).end();
}));

View File

@@ -0,0 +1,14 @@
-- 029_voice_clips.sql
-- Optional retained Dross voice clips (when the "Keep voice clips" setting is on).
-- Transcript + metadata here (durable, HA-replicated); audio bytes live as files
-- on the owner-only ZFS subvol mounted at /var/lib/void/voice-clips.
CREATE TABLE voice_clips (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
transcript text NOT NULL DEFAULT '',
duration_ms integer,
bytes bigint,
mime text,
path text NOT NULL,
created_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX idx_voice_clips_created ON voice_clips (created_at DESC);

View File

@@ -0,0 +1,26 @@
import { pool } from '../pool.js';
export async function create({ transcript = '', duration_ms = null, bytes = null, mime = null, path }) {
const { rows } = await pool.query(
`INSERT INTO voice_clips (transcript, duration_ms, bytes, mime, path)
VALUES ($1,$2,$3,$4,$5) RETURNING *`,
[transcript, duration_ms, bytes, mime, path]);
return rows[0];
}
export async function list(limit = 100) {
const { rows } = await pool.query(
`SELECT id, transcript, duration_ms, bytes, mime, created_at
FROM voice_clips ORDER BY created_at DESC LIMIT $1`, [limit]);
return rows;
}
export async function get(id) {
const { rows } = await pool.query(`SELECT * FROM voice_clips WHERE id = $1`, [id]);
return rows[0] || null;
}
export async function remove(id) {
const { rows } = await pool.query(`DELETE FROM voice_clips WHERE id = $1 RETURNING path`, [id]);
return rows[0] || null; // returns {path} so the caller can unlink the file
}