From bc1b820cc836a3abaf2d67c18a5d148ca89f6794 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 1 Jun 2026 21:51:55 +1000 Subject: [PATCH] feat(ai): claude CLI subprocess driver (subscription auth, stream-json) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements runClaudeTurn() — spawns the claude CLI for a single companion turn using subscription/OAuth auth (strips ANTHROPIC_API_KEY + ANTHROPIC_AUTH_TOKEN from child env), streaming normalised events (delta, tool, tool_result, result, error) via onEvent callback. Includes hermetic test + fake-claude.js fixture that mimics real 2.1.159 stream-json output; zero network/CLI calls in the test suite. Co-Authored-By: Claude Opus 4.8 --- lib/ai/claude_cli.js | 276 ++++++++++++++++++++++++++++++++++ tests/ai/claude_cli.test.js | 92 ++++++++++++ tests/fixtures/fake-claude.js | 66 ++++++++ 3 files changed, 434 insertions(+) create mode 100644 lib/ai/claude_cli.js create mode 100644 tests/ai/claude_cli.test.js create mode 100755 tests/fixtures/fake-claude.js diff --git a/lib/ai/claude_cli.js b/lib/ai/claude_cli.js new file mode 100644 index 0000000..569a30d --- /dev/null +++ b/lib/ai/claude_cli.js @@ -0,0 +1,276 @@ +/** + * lib/ai/claude_cli.js + * + * Spawns the `claude` CLI (Claude Code) for a single companion turn using + * subscription auth (OAuth / keychain), streaming normalized events. + * + * Mirrors the core logic of Void 1.0's agent.js but: + * - Is a pure function (no global state, no EventEmitter) + * - Uses ESM + * - Emits a simplified, UI-vocabulary-aligned event set (delta, tool, + * tool_result, result, error) + * - Is injectable for testing via `claudeExe` + * + * ## stream-json event format (CLI 2.1.159) + * The CLI wraps Anthropic API streaming events in a `stream_event` envelope: + * { type: "stream_event", event: { type: "content_block_delta", ... } } + * + * Top-level bare events are also emitted: + * { type: "system", subtype: "init", ... } — ignored + * { type: "assistant", ... } — ignored (snapshot, duplicates deltas) + * { type: "tool_result", ... } — surfaced as-is + * { type: "result", subtype: "success", ... } — final usage/cost summary + * { type: "error", ... } — error + * + * ## Tool event semantics + * A single `{type:'tool', tool, status:'done'|'error'}` is emitted per + * completed tool call when the input_json assembly is finished (on + * content_block_stop for a tool_use block). No separate 'running' event is + * emitted — the UI renders a chip per tool completion. A pending-tool map + * tracks open tool blocks by index so we know name+id at stop time. + * + * ## Allowed flags (verified against 2.1.159) + * --print (-p) + * --output-format stream-json + * --verbose + * --include-partial-messages + * --append-system-prompt (inline text, no temp file needed) + * --append-system-prompt-file (file variant — undocumented but works) + * --session-id + * --mcp-config (accepts multiple space-separated paths) + * --strict-mcp-config + * --allowedTools ... (space-separated, single flag, multiple values) + * + * @module lib/ai/claude_cli + */ + +import { spawn } from 'child_process'; +import { createInterface } from 'readline'; + +/** + * Run a single non-interactive Claude CLI turn. + * + * @param {object} opts + * @param {string} opts.sessionId UUID for the session (passed as --session-id) + * @param {string} opts.systemPrompt Appended system prompt text + * @param {string} opts.userText The user message (positional arg) + * @param {string} [opts.mcpConfigPath] If set, passed as --mcp-config + --strict-mcp-config + * @param {string[]} [opts.allowedTools] Tool names to allow (--allowedTools multi-value) + * @param {function} [opts.onEvent] Called for each normalized event + * @param {string} [opts.claudeExe] Path or name of claude binary (default: CLAUDE_EXE env or 'claude') + * @param {string} [opts.home] If set, overrides HOME in child env (for service-user creds) + * @param {string} [opts.cwd] Working directory for the child process + * @param {number} [opts.timeoutMs] Milliseconds before SIGTERM (default: 600000) + * + * @returns {Promise<{text: string, toolTrace: Array<{tool:string,status:string,id?:string}>, usage: object|null}>} + */ +export async function runClaudeTurn(opts) { + const { + sessionId, + systemPrompt, + userText, + mcpConfigPath, + allowedTools = [], + onEvent, + claudeExe = process.env.CLAUDE_EXE || 'claude', + home = process.env.VOID_CLAUDE_HOME, + cwd, + timeoutMs = 600_000, + } = opts; + + const emit = onEvent || (() => {}); + + // Build args + const args = [ + '--print', + '--output-format', 'stream-json', + '--verbose', + '--include-partial-messages', + '--append-system-prompt', systemPrompt, + '--session-id', sessionId, + ]; + + if (mcpConfigPath) { + args.push('--mcp-config', mcpConfigPath, '--strict-mcp-config'); + } + + if (allowedTools.length > 0) { + // --allowedTools accepts space-separated list as multiple values under one flag + args.push('--allowedTools', ...allowedTools); + } + + // Positional user message must come last + args.push(userText); + + // Child env: clone, strip API key env vars so CLI uses subscription/OAuth auth + const childEnv = { ...process.env }; + delete childEnv.ANTHROPIC_API_KEY; + delete childEnv.ANTHROPIC_AUTH_TOKEN; + if (home) childEnv.HOME = home; + + // Accumulated state + let text = ''; + /** @type {Array<{tool:string,status:string,id?:string}>} */ + const toolTrace = []; + let usage = null; + + // Track open tool_use blocks by content index so we can emit on stop + // Map + const pendingTools = new Map(); + // Track the last tool_use name/id for correlating tool_result events + // (tool_result arrives after all content_blocks are done, with a tool_use_id) + const toolById = new Map(); // id → name + + return new Promise((resolve) => { + let proc; + try { + proc = spawn(claudeExe, args, { + cwd: cwd || process.cwd(), + env: childEnv, + stdio: ['ignore', 'pipe', 'pipe'], + }); + } catch (err) { + emit({ type: 'error', message: err.message }); + resolve({ text, toolTrace, usage }); + return; + } + + let timedOut = false; + const timeout = setTimeout(() => { + timedOut = true; + emit({ type: 'error', message: `claude CLI timed out after ${timeoutMs}ms` }); + proc.kill('SIGTERM'); + }, timeoutMs); + + const rl = createInterface({ input: proc.stdout, crlfDelay: Infinity }); + + rl.on('line', (line) => { + if (!line.trim()) return; + + let raw; + try { + raw = JSON.parse(line); + } catch { + // Non-JSON line — ignore silently (could be debug output) + return; + } + + processRawLine(raw); + }); + + proc.stderr.on('data', (chunk) => { + // Log stderr but don't surface as errors — many are informational + // (process.stderr is not captured in tests; this is a no-op there) + }); + + proc.on('error', (err) => { + clearTimeout(timeout); + emit({ type: 'error', message: err.message }); + resolve({ text, toolTrace, usage }); + }); + + proc.on('close', (code) => { + clearTimeout(timeout); + if (code !== 0 && code !== null && !timedOut) { + emit({ type: 'error', message: `claude CLI exited with code ${code}` }); + } + resolve({ text, toolTrace, usage }); + }); + + /** + * Normalise one parsed JSON line. + * The CLI emits two shapes: + * 1. Bare top-level: { type: "system"|"assistant"|"tool_result"|"result"|"error"|"rate_limit_event"|... } + * 2. Wrapped: { type: "stream_event", event: { type: "content_block_*"|"message_*", ... } } + */ + function processRawLine(raw) { + const t = raw.type; + + if (t === 'stream_event') { + processStreamEvent(raw.event); + return; + } + + // Bare top-level events + if (t === 'system' || t === 'rate_limit_event') { + // Ignored — system/init info and rate limit metadata not relevant to UI + return; + } + + if (t === 'assistant') { + // Full message snapshot — duplicates deltas, ignore to avoid doubling text + return; + } + + if (t === 'tool_result') { + // { type: "tool_result", tool_use_id: "...", content: [...] } + const id = raw.tool_use_id; + const name = toolById.get(id) || null; + const result = raw.content; + const ev = { type: 'tool_result', name, result }; + emit(ev); + return; + } + + if (t === 'result') { + // Final summary: { type:"result", subtype:"success"|"error", total_cost_usd, usage, ... } + usage = raw.usage || null; + const ev = { type: 'result', usage, cost: raw.total_cost_usd ?? null }; + emit(ev); + return; + } + + if (t === 'error') { + const message = raw.error?.message || raw.message || JSON.stringify(raw); + emit({ type: 'error', message }); + return; + } + + // Unknown top-level type — ignore + } + + /** + * Process an unwrapped Anthropic streaming event (the inner `.event` from + * a stream_event envelope, or a direct API-shaped event). + */ + function processStreamEvent(ev) { + if (!ev) return; + const t = ev.type; + + if (t === 'content_block_start') { + const block = ev.content_block; + if (block?.type === 'tool_use') { + // Track open tool block so we can emit on stop + pendingTools.set(ev.index, { name: block.name, id: block.id }); + toolById.set(block.id, block.name); + } + // text start: no event emitted — we stream via deltas + return; + } + + if (t === 'content_block_delta') { + const d = ev.delta; + if (!d) return; + if (d.type === 'text_delta') { + text += d.text; + emit({ type: 'delta', text: d.text }); + } + // input_json_delta: tool input accumulation — no UI event needed mid-stream + return; + } + + if (t === 'content_block_stop') { + const pending = pendingTools.get(ev.index); + if (pending) { + pendingTools.delete(ev.index); + const entry = { tool: pending.name, status: 'done', id: pending.id }; + toolTrace.push(entry); + emit({ type: 'tool', tool: pending.name, status: 'done', id: pending.id }); + } + return; + } + + // message_start, message_delta, message_stop — no normalised events + } + }); +} diff --git a/tests/ai/claude_cli.test.js b/tests/ai/claude_cli.test.js new file mode 100644 index 0000000..ff92ef7 --- /dev/null +++ b/tests/ai/claude_cli.test.js @@ -0,0 +1,92 @@ +import { describe, it, expect } from 'vitest'; +import { fileURLToPath } from 'url'; +import path from 'path'; +import { runClaudeTurn } from '../../lib/ai/claude_cli.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const FAKE_CLAUDE = path.resolve(__dirname, '../fixtures/fake-claude.js'); + +// --------------------------------------------------------------------------- +// Hermetic tests: fake-claude.js emits known stream-json lines; we assert the +// driver normalises them correctly. NO real claude, NO network. +// --------------------------------------------------------------------------- + +describe('runClaudeTurn', () => { + it('normalises text deltas, tool events, and tool_result from fake-claude output', async () => { + const collected = []; + const onEvent = (ev) => collected.push(ev); + + const result = await runClaudeTurn({ + claudeExe: FAKE_CLAUDE, + sessionId: 'test-session-uuid-0001', + systemPrompt: 'You are a test assistant.', + userText: 'hi', + onEvent, + timeoutMs: 10_000, + }); + + // --- Collected event assertions --- + + // delta events whose texts concat to 'Hello' + const deltas = collected.filter(e => e.type === 'delta'); + expect(deltas.length).toBeGreaterThanOrEqual(1); + expect(deltas.map(e => e.text).join('')).toBe('Hello'); + + // tool event for propose_change + const toolEvents = collected.filter(e => e.type === 'tool'); + expect(toolEvents.length).toBeGreaterThanOrEqual(1); + expect(toolEvents[0].tool).toBe('propose_change'); + + // tool_result event + const toolResults = collected.filter(e => e.type === 'tool_result'); + expect(toolResults.length).toBe(1); + expect(toolResults[0].name).toBe('propose_change'); + expect(toolResults[0].result).toBeDefined(); + + // result event + const resultEvents = collected.filter(e => e.type === 'result'); + expect(resultEvents.length).toBe(1); + expect(resultEvents[0].usage).toBeDefined(); + expect(typeof resultEvents[0].cost).toBe('number'); + + // no error events + const errorEvents = collected.filter(e => e.type === 'error'); + expect(errorEvents).toHaveLength(0); + + // --- Return value assertions --- + expect(result.text).toBe('Hello'); + expect(result.usage).toBeDefined(); + expect(result.usage.input_tokens).toBe(100); + + // toolTrace must include propose_change + expect(result.toolTrace).toBeDefined(); + const proposeEntry = result.toolTrace.find(t => t.tool === 'propose_change'); + expect(proposeEntry).toBeDefined(); + }); + + it('resolves cleanly on non-zero exit (emits error event, does not throw)', async () => { + // Use a fake script that exits 1 immediately + const collected = []; + + const result = await runClaudeTurn({ + claudeExe: 'node', + // Pass a tiny inline script that exits 1. We override claudeExe='node' + // and prepend the inline arg via a wrapper... but the API doesn't support + // extra args. Instead we'll use a shell -c workaround via /bin/sh. + // Simpler: just test via sessionId, and use a real bad path. + // Actually: claudeExe itself is the executable; let's just use a path + // that doesn't exist to trigger spawn error. + sessionId: 'bad-session', + systemPrompt: 'x', + userText: 'hi', + onEvent: (ev) => collected.push(ev), + timeoutMs: 5_000, + }); + + // Should resolve (not throw) and include an error event + const errorEvents = collected.filter(e => e.type === 'error'); + expect(errorEvents.length).toBeGreaterThanOrEqual(1); + // result.text may be empty string on error + expect(typeof result.text).toBe('string'); + }); +}); diff --git a/tests/fixtures/fake-claude.js b/tests/fixtures/fake-claude.js new file mode 100755 index 0000000..5c49474 --- /dev/null +++ b/tests/fixtures/fake-claude.js @@ -0,0 +1,66 @@ +#!/usr/bin/env node +/** + * Fake claude CLI for hermetic tests. + * + * Mimics the stream-json output format of claude CLI 2.1.159: + * top-level events are either bare objects (system, assistant, result) or + * wrapped in {type:"stream_event", event:{...}}. + * + * Writes to stdout and exits 0. + */ + +const lines = [ + // system init (ignored by driver) + { type: 'system', subtype: 'init', session_id: 'fake-session-001', tools: [], cwd: '/tmp' }, + + // content_block_start: text block + { type: 'stream_event', event: { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } } }, + + // text deltas — together they spell "Hello" + { type: 'stream_event', event: { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'Hel' } } }, + { type: 'stream_event', event: { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'lo' } } }, + + // assistant snapshot (should be ignored by driver to avoid duplication) + { type: 'assistant', message: { role: 'assistant', content: [{ type: 'text', text: 'Hello' }] } }, + + // content_block_stop for text + { type: 'stream_event', event: { type: 'content_block_stop', index: 0 } }, + + // content_block_start: tool_use + { type: 'stream_event', event: { type: 'content_block_start', index: 1, content_block: { type: 'tool_use', id: 'toolu_fake01', name: 'propose_change', input: {} } } }, + + // tool input delta + { type: 'stream_event', event: { type: 'content_block_delta', index: 1, delta: { type: 'input_json_delta', partial_json: '{"file":"/tmp/x.js","content":"console.log(1)"}' } } }, + + // assistant snapshot for tool_use (ignored) + { type: 'assistant', message: { role: 'assistant', content: [{ type: 'tool_use', id: 'toolu_fake01', name: 'propose_change', input: { file: '/tmp/x.js', content: 'console.log(1)' } }] } }, + + // content_block_stop for tool + { type: 'stream_event', event: { type: 'content_block_stop', index: 1 } }, + + // tool_result event + { type: 'tool_result', tool_use_id: 'toolu_fake01', content: [{ type: 'text', text: 'change staged' }] }, + + // final result + { + type: 'result', + subtype: 'success', + is_error: false, + result: 'Hello', + stop_reason: 'end_turn', + session_id: 'fake-session-001', + total_cost_usd: 0.001234, + usage: { + input_tokens: 100, + output_tokens: 10, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }, + }, +]; + +for (const line of lines) { + process.stdout.write(JSON.stringify(line) + '\n'); +} + +process.exit(0);