feat(ai): claude CLI subprocess driver (subscription auth, stream-json)

Implements runClaudeTurn() — spawns the claude CLI for a single companion turn using subscription/OAuth auth (strips ANTHROPIC_API_KEY + ANTHROPIC_AUTH_TOKEN from child env), streaming normalised events (delta, tool, tool_result, result, error) via onEvent callback. Includes hermetic test + fake-claude.js fixture that mimics real 2.1.159 stream-json output; zero network/CLI calls in the test suite. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-01 21:51:55 +10:00
parent 1c03d6c277
commit bc1b820cc8
3 changed files with 434 additions and 0 deletions
--- a/lib/ai/claude_cli.js
+++ b/lib/ai/claude_cli.js
@@ -0,0 +1,276 @@
+/**
+ * lib/ai/claude_cli.js
+ *
+ * Spawns the `claude` CLI (Claude Code) for a single companion turn using
+ * subscription auth (OAuth / keychain), streaming normalized events.
+ *
+ * Mirrors the core logic of Void 1.0's agent.js but:
+ *  - Is a pure function (no global state, no EventEmitter)
+ *  - Uses ESM
+ *  - Emits a simplified, UI-vocabulary-aligned event set (delta, tool,
+ *    tool_result, result, error)
+ *  - Is injectable for testing via `claudeExe`
+ *
+ * ## stream-json event format (CLI 2.1.159)
+ * The CLI wraps Anthropic API streaming events in a `stream_event` envelope:
+ *   { type: "stream_event", event: { type: "content_block_delta", ... } }
+ *
+ * Top-level bare events are also emitted:
+ *   { type: "system", subtype: "init", ... }   — ignored
+ *   { type: "assistant", ... }                  — ignored (snapshot, duplicates deltas)
+ *   { type: "tool_result", ... }               — surfaced as-is
+ *   { type: "result", subtype: "success", ... } — final usage/cost summary
+ *   { type: "error", ... }                     — error
+ *
+ * ## Tool event semantics
+ * A single `{type:'tool', tool, status:'done'|'error'}` is emitted per
+ * completed tool call when the input_json assembly is finished (on
+ * content_block_stop for a tool_use block).  No separate 'running' event is
+ * emitted — the UI renders a chip per tool completion.  A pending-tool map
+ * tracks open tool blocks by index so we know name+id at stop time.
+ *
+ * ## Allowed flags (verified against 2.1.159)
+ *   --print  (-p)
+ *   --output-format stream-json
+ *   --verbose
+ *   --include-partial-messages
+ *   --append-system-prompt <text>           (inline text, no temp file needed)
+ *   --append-system-prompt-file <path>      (file variant — undocumented but works)
+ *   --session-id <uuid>
+ *   --mcp-config <path>                     (accepts multiple space-separated paths)
+ *   --strict-mcp-config
+ *   --allowedTools <tool1> <tool2> ...      (space-separated, single flag, multiple values)
+ *
+ * @module lib/ai/claude_cli
+ */
+
+import { spawn } from 'child_process';
+import { createInterface } from 'readline';
+
+/**
+ * Run a single non-interactive Claude CLI turn.
+ *
+ * @param {object} opts
+ * @param {string}   opts.sessionId        UUID for the session (passed as --session-id)
+ * @param {string}   opts.systemPrompt     Appended system prompt text
+ * @param {string}   opts.userText         The user message (positional arg)
+ * @param {string}   [opts.mcpConfigPath]  If set, passed as --mcp-config + --strict-mcp-config
+ * @param {string[]} [opts.allowedTools]   Tool names to allow (--allowedTools multi-value)
+ * @param {function} [opts.onEvent]        Called for each normalized event
+ * @param {string}   [opts.claudeExe]      Path or name of claude binary (default: CLAUDE_EXE env or 'claude')
+ * @param {string}   [opts.home]           If set, overrides HOME in child env (for service-user creds)
+ * @param {string}   [opts.cwd]            Working directory for the child process
+ * @param {number}   [opts.timeoutMs]      Milliseconds before SIGTERM (default: 600000)
+ *
+ * @returns {Promise<{text: string, toolTrace: Array<{tool:string,status:string,id?:string}>, usage: object|null}>}
+ */
+export async function runClaudeTurn(opts) {
+  const {
+    sessionId,
+    systemPrompt,
+    userText,
+    mcpConfigPath,
+    allowedTools = [],
+    onEvent,
+    claudeExe = process.env.CLAUDE_EXE || 'claude',
+    home = process.env.VOID_CLAUDE_HOME,
+    cwd,
+    timeoutMs = 600_000,
+  } = opts;
+
+  const emit = onEvent || (() => {});
+
+  // Build args
+  const args = [
+    '--print',
+    '--output-format', 'stream-json',
+    '--verbose',
+    '--include-partial-messages',
+    '--append-system-prompt', systemPrompt,
+    '--session-id', sessionId,
+  ];
+
+  if (mcpConfigPath) {
+    args.push('--mcp-config', mcpConfigPath, '--strict-mcp-config');
+  }
+
+  if (allowedTools.length > 0) {
+    // --allowedTools accepts space-separated list as multiple values under one flag
+    args.push('--allowedTools', ...allowedTools);
+  }
+
+  // Positional user message must come last
+  args.push(userText);
+
+  // Child env: clone, strip API key env vars so CLI uses subscription/OAuth auth
+  const childEnv = { ...process.env };
+  delete childEnv.ANTHROPIC_API_KEY;
+  delete childEnv.ANTHROPIC_AUTH_TOKEN;
+  if (home) childEnv.HOME = home;
+
+  // Accumulated state
+  let text = '';
+  /** @type {Array<{tool:string,status:string,id?:string}>} */
+  const toolTrace = [];
+  let usage = null;
+
+  // Track open tool_use blocks by content index so we can emit on stop
+  // Map<index, { name: string, id: string }>
+  const pendingTools = new Map();
+  // Track the last tool_use name/id for correlating tool_result events
+  // (tool_result arrives after all content_blocks are done, with a tool_use_id)
+  const toolById = new Map(); // id → name
+
+  return new Promise((resolve) => {
+    let proc;
+    try {
+      proc = spawn(claudeExe, args, {
+        cwd: cwd || process.cwd(),
+        env: childEnv,
+        stdio: ['ignore', 'pipe', 'pipe'],
+      });
+    } catch (err) {
+      emit({ type: 'error', message: err.message });
+      resolve({ text, toolTrace, usage });
+      return;
+    }
+
+    let timedOut = false;
+    const timeout = setTimeout(() => {
+      timedOut = true;
+      emit({ type: 'error', message: `claude CLI timed out after ${timeoutMs}ms` });
+      proc.kill('SIGTERM');
+    }, timeoutMs);
+
+    const rl = createInterface({ input: proc.stdout, crlfDelay: Infinity });
+
+    rl.on('line', (line) => {
+      if (!line.trim()) return;
+
+      let raw;
+      try {
+        raw = JSON.parse(line);
+      } catch {
+        // Non-JSON line — ignore silently (could be debug output)
+        return;
+      }
+
+      processRawLine(raw);
+    });
+
+    proc.stderr.on('data', (chunk) => {
+      // Log stderr but don't surface as errors — many are informational
+      // (process.stderr is not captured in tests; this is a no-op there)
+    });
+
+    proc.on('error', (err) => {
+      clearTimeout(timeout);
+      emit({ type: 'error', message: err.message });
+      resolve({ text, toolTrace, usage });
+    });
+
+    proc.on('close', (code) => {
+      clearTimeout(timeout);
+      if (code !== 0 && code !== null && !timedOut) {
+        emit({ type: 'error', message: `claude CLI exited with code ${code}` });
+      }
+      resolve({ text, toolTrace, usage });
+    });
+
+    /**
+     * Normalise one parsed JSON line.
+     * The CLI emits two shapes:
+     *   1. Bare top-level: { type: "system"|"assistant"|"tool_result"|"result"|"error"|"rate_limit_event"|... }
+     *   2. Wrapped:        { type: "stream_event", event: { type: "content_block_*"|"message_*", ... } }
+     */
+    function processRawLine(raw) {
+      const t = raw.type;
+
+      if (t === 'stream_event') {
+        processStreamEvent(raw.event);
+        return;
+      }
+
+      // Bare top-level events
+      if (t === 'system' || t === 'rate_limit_event') {
+        // Ignored — system/init info and rate limit metadata not relevant to UI
+        return;
+      }
+
+      if (t === 'assistant') {
+        // Full message snapshot — duplicates deltas, ignore to avoid doubling text
+        return;
+      }
+
+      if (t === 'tool_result') {
+        // { type: "tool_result", tool_use_id: "...", content: [...] }
+        const id = raw.tool_use_id;
+        const name = toolById.get(id) || null;
+        const result = raw.content;
+        const ev = { type: 'tool_result', name, result };
+        emit(ev);
+        return;
+      }
+
+      if (t === 'result') {
+        // Final summary: { type:"result", subtype:"success"|"error", total_cost_usd, usage, ... }
+        usage = raw.usage || null;
+        const ev = { type: 'result', usage, cost: raw.total_cost_usd ?? null };
+        emit(ev);
+        return;
+      }
+
+      if (t === 'error') {
+        const message = raw.error?.message || raw.message || JSON.stringify(raw);
+        emit({ type: 'error', message });
+        return;
+      }
+
+      // Unknown top-level type — ignore
+    }
+
+    /**
+     * Process an unwrapped Anthropic streaming event (the inner `.event` from
+     * a stream_event envelope, or a direct API-shaped event).
+     */
+    function processStreamEvent(ev) {
+      if (!ev) return;
+      const t = ev.type;
+
+      if (t === 'content_block_start') {
+        const block = ev.content_block;
+        if (block?.type === 'tool_use') {
+          // Track open tool block so we can emit on stop
+          pendingTools.set(ev.index, { name: block.name, id: block.id });
+          toolById.set(block.id, block.name);
+        }
+        // text start: no event emitted — we stream via deltas
+        return;
+      }
+
+      if (t === 'content_block_delta') {
+        const d = ev.delta;
+        if (!d) return;
+        if (d.type === 'text_delta') {
+          text += d.text;
+          emit({ type: 'delta', text: d.text });
+        }
+        // input_json_delta: tool input accumulation — no UI event needed mid-stream
+        return;
+      }
+
+      if (t === 'content_block_stop') {
+        const pending = pendingTools.get(ev.index);
+        if (pending) {
+          pendingTools.delete(ev.index);
+          const entry = { tool: pending.name, status: 'done', id: pending.id };
+          toolTrace.push(entry);
+          emit({ type: 'tool', tool: pending.name, status: 'done', id: pending.id });
+        }
+        return;
+      }
+
+      // message_start, message_delta, message_stop — no normalised events
+    }
+  });
+}
--- a/tests/ai/claude_cli.test.js
+++ b/tests/ai/claude_cli.test.js
@@ -0,0 +1,92 @@
+import { describe, it, expect } from 'vitest';
+import { fileURLToPath } from 'url';
+import path from 'path';
+import { runClaudeTurn } from '../../lib/ai/claude_cli.js';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const FAKE_CLAUDE = path.resolve(__dirname, '../fixtures/fake-claude.js');
+
+// ---------------------------------------------------------------------------
+// Hermetic tests: fake-claude.js emits known stream-json lines; we assert the
+// driver normalises them correctly.  NO real claude, NO network.
+// ---------------------------------------------------------------------------
+
+describe('runClaudeTurn', () => {
+  it('normalises text deltas, tool events, and tool_result from fake-claude output', async () => {
+    const collected = [];
+    const onEvent = (ev) => collected.push(ev);
+
+    const result = await runClaudeTurn({
+      claudeExe: FAKE_CLAUDE,
+      sessionId: 'test-session-uuid-0001',
+      systemPrompt: 'You are a test assistant.',
+      userText: 'hi',
+      onEvent,
+      timeoutMs: 10_000,
+    });
+
+    // --- Collected event assertions ---
+
+    // delta events whose texts concat to 'Hello'
+    const deltas = collected.filter(e => e.type === 'delta');
+    expect(deltas.length).toBeGreaterThanOrEqual(1);
+    expect(deltas.map(e => e.text).join('')).toBe('Hello');
+
+    // tool event for propose_change
+    const toolEvents = collected.filter(e => e.type === 'tool');
+    expect(toolEvents.length).toBeGreaterThanOrEqual(1);
+    expect(toolEvents[0].tool).toBe('propose_change');
+
+    // tool_result event
+    const toolResults = collected.filter(e => e.type === 'tool_result');
+    expect(toolResults.length).toBe(1);
+    expect(toolResults[0].name).toBe('propose_change');
+    expect(toolResults[0].result).toBeDefined();
+
+    // result event
+    const resultEvents = collected.filter(e => e.type === 'result');
+    expect(resultEvents.length).toBe(1);
+    expect(resultEvents[0].usage).toBeDefined();
+    expect(typeof resultEvents[0].cost).toBe('number');
+
+    // no error events
+    const errorEvents = collected.filter(e => e.type === 'error');
+    expect(errorEvents).toHaveLength(0);
+
+    // --- Return value assertions ---
+    expect(result.text).toBe('Hello');
+    expect(result.usage).toBeDefined();
+    expect(result.usage.input_tokens).toBe(100);
+
+    // toolTrace must include propose_change
+    expect(result.toolTrace).toBeDefined();
+    const proposeEntry = result.toolTrace.find(t => t.tool === 'propose_change');
+    expect(proposeEntry).toBeDefined();
+  });
+
+  it('resolves cleanly on non-zero exit (emits error event, does not throw)', async () => {
+    // Use a fake script that exits 1 immediately
+    const collected = [];
+
+    const result = await runClaudeTurn({
+      claudeExe: 'node',
+      // Pass a tiny inline script that exits 1.  We override claudeExe='node'
+      // and prepend the inline arg via a wrapper... but the API doesn't support
+      // extra args.  Instead we'll use a shell -c workaround via /bin/sh.
+      // Simpler: just test via sessionId, and use a real bad path.
+      // Actually: claudeExe itself is the executable; let's just use a path
+      // that doesn't exist to trigger spawn error.
+      sessionId: 'bad-session',
+      systemPrompt: 'x',
+      userText: 'hi',
+      onEvent: (ev) => collected.push(ev),
+      timeoutMs: 5_000,
+    });
+
+    // Should resolve (not throw) and include an error event
+    const errorEvents = collected.filter(e => e.type === 'error');
+    expect(errorEvents.length).toBeGreaterThanOrEqual(1);
+    // result.text may be empty string on error
+    expect(typeof result.text).toBe('string');
+  });
+});
--- a/tests/fixtures/fake-claude.js
+++ b/tests/fixtures/fake-claude.js
@@ -0,0 +1,66 @@
+#!/usr/bin/env node
+/**
+ * Fake claude CLI for hermetic tests.
+ *
+ * Mimics the stream-json output format of claude CLI 2.1.159:
+ * top-level events are either bare objects (system, assistant, result) or
+ * wrapped in {type:"stream_event", event:{...}}.
+ *
+ * Writes to stdout and exits 0.
+ */
+
+const lines = [
+  // system init (ignored by driver)
+  { type: 'system', subtype: 'init', session_id: 'fake-session-001', tools: [], cwd: '/tmp' },
+
+  // content_block_start: text block
+  { type: 'stream_event', event: { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } } },
+
+  // text deltas — together they spell "Hello"
+  { type: 'stream_event', event: { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'Hel' } } },
+  { type: 'stream_event', event: { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'lo' } } },
+
+  // assistant snapshot (should be ignored by driver to avoid duplication)
+  { type: 'assistant', message: { role: 'assistant', content: [{ type: 'text', text: 'Hello' }] } },
+
+  // content_block_stop for text
+  { type: 'stream_event', event: { type: 'content_block_stop', index: 0 } },
+
+  // content_block_start: tool_use
+  { type: 'stream_event', event: { type: 'content_block_start', index: 1, content_block: { type: 'tool_use', id: 'toolu_fake01', name: 'propose_change', input: {} } } },
+
+  // tool input delta
+  { type: 'stream_event', event: { type: 'content_block_delta', index: 1, delta: { type: 'input_json_delta', partial_json: '{"file":"/tmp/x.js","content":"console.log(1)"}' } } },
+
+  // assistant snapshot for tool_use (ignored)
+  { type: 'assistant', message: { role: 'assistant', content: [{ type: 'tool_use', id: 'toolu_fake01', name: 'propose_change', input: { file: '/tmp/x.js', content: 'console.log(1)' } }] } },
+
+  // content_block_stop for tool
+  { type: 'stream_event', event: { type: 'content_block_stop', index: 1 } },
+
+  // tool_result event
+  { type: 'tool_result', tool_use_id: 'toolu_fake01', content: [{ type: 'text', text: 'change staged' }] },
+
+  // final result
+  {
+    type: 'result',
+    subtype: 'success',
+    is_error: false,
+    result: 'Hello',
+    stop_reason: 'end_turn',
+    session_id: 'fake-session-001',
+    total_cost_usd: 0.001234,
+    usage: {
+      input_tokens: 100,
+      output_tokens: 10,
+      cache_read_input_tokens: 0,
+      cache_creation_input_tokens: 0,
+    },
+  },
+];
+
+for (const line of lines) {
+  process.stdout.write(JSON.stringify(line) + '\n');
+}
+
+process.exit(0);