feat(ai): claude CLI subprocess driver (subscription auth, stream-json)

Implements runClaudeTurn() — spawns the claude CLI for a single companion turn using subscription/OAuth auth (strips ANTHROPIC_API_KEY + ANTHROPIC_AUTH_TOKEN from child env), streaming normalised events (delta, tool, tool_result, result, error) via onEvent callback. Includes hermetic test + fake-claude.js fixture that mimics real 2.1.159 stream-json output; zero network/CLI calls in the test suite. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-01 21:51:55 +10:00
parent 1c03d6c277
commit bc1b820cc8
3 changed files with 434 additions and 0 deletions
--- a/lib/ai/claude_cli.js
+++ b/lib/ai/claude_cli.js
@@ -0,0 +1,276 @@
 /**
 * lib/ai/claude_cli.js
 *
 * Spawns the `claude` CLI (Claude Code) for a single companion turn using
 * subscription auth (OAuth / keychain), streaming normalized events.
 *
 * Mirrors the core logic of Void 1.0's agent.js but:
 *  - Is a pure function (no global state, no EventEmitter)
 *  - Uses ESM
 *  - Emits a simplified, UI-vocabulary-aligned event set (delta, tool,
 *    tool_result, result, error)
 *  - Is injectable for testing via `claudeExe`
 *
 * ## stream-json event format (CLI 2.1.159)
 * The CLI wraps Anthropic API streaming events in a `stream_event` envelope:
 *   { type: "stream_event", event: { type: "content_block_delta", ... } }
 *
 * Top-level bare events are also emitted:
 *   { type: "system", subtype: "init", ... }   — ignored
 *   { type: "assistant", ... }                  — ignored (snapshot, duplicates deltas)
 *   { type: "tool_result", ... }               — surfaced as-is
 *   { type: "result", subtype: "success", ... } — final usage/cost summary
 *   { type: "error", ... }                     — error
 *
 * ## Tool event semantics
 * A single `{type:'tool', tool, status:'done'|'error'}` is emitted per
 * completed tool call when the input_json assembly is finished (on
 * content_block_stop for a tool_use block).  No separate 'running' event is
 * emitted — the UI renders a chip per tool completion.  A pending-tool map
 * tracks open tool blocks by index so we know name+id at stop time.
 *
 * ## Allowed flags (verified against 2.1.159)
 *   --print  (-p)
 *   --output-format stream-json
 *   --verbose
 *   --include-partial-messages
 *   --append-system-prompt <text>           (inline text, no temp file needed)
 *   --append-system-prompt-file <path>      (file variant — undocumented but works)
 *   --session-id <uuid>
 *   --mcp-config <path>                     (accepts multiple space-separated paths)
 *   --strict-mcp-config
 *   --allowedTools <tool1> <tool2> ...      (space-separated, single flag, multiple values)
 *
 * @module lib/ai/claude_cli
 */
 import { spawn } from 'child_process';
 import { createInterface } from 'readline';
 /**
 * Run a single non-interactive Claude CLI turn.
 *
 * @param {object} opts
 * @param {string}   opts.sessionId        UUID for the session (passed as --session-id)
 * @param {string}   opts.systemPrompt     Appended system prompt text
 * @param {string}   opts.userText         The user message (positional arg)
 * @param {string}   [opts.mcpConfigPath]  If set, passed as --mcp-config + --strict-mcp-config
 * @param {string[]} [opts.allowedTools]   Tool names to allow (--allowedTools multi-value)
 * @param {function} [opts.onEvent]        Called for each normalized event
 * @param {string}   [opts.claudeExe]      Path or name of claude binary (default: CLAUDE_EXE env or 'claude')
 * @param {string}   [opts.home]           If set, overrides HOME in child env (for service-user creds)
 * @param {string}   [opts.cwd]            Working directory for the child process
 * @param {number}   [opts.timeoutMs]      Milliseconds before SIGTERM (default: 600000)
 *
 * @returns {Promise<{text: string, toolTrace: Array<{tool:string,status:string,id?:string}>, usage: object|null}>}
 */
 export async function runClaudeTurn(opts) {
  const {
    sessionId,
    systemPrompt,
    userText,
    mcpConfigPath,
    allowedTools = [],
    onEvent,
    claudeExe = process.env.CLAUDE_EXE || 'claude',
    home = process.env.VOID_CLAUDE_HOME,
    cwd,
    timeoutMs = 600_000,
  } = opts;
  const emit = onEvent || (() => {});
  // Build args
  const args = [
    '--print',
    '--output-format', 'stream-json',
    '--verbose',
    '--include-partial-messages',
    '--append-system-prompt', systemPrompt,
    '--session-id', sessionId,
  ];
  if (mcpConfigPath) {
    args.push('--mcp-config', mcpConfigPath, '--strict-mcp-config');
  }
  if (allowedTools.length > 0) {
    // --allowedTools accepts space-separated list as multiple values under one flag
    args.push('--allowedTools', ...allowedTools);
  }
  // Positional user message must come last
  args.push(userText);
  // Child env: clone, strip API key env vars so CLI uses subscription/OAuth auth
  const childEnv = { ...process.env };
  delete childEnv.ANTHROPIC_API_KEY;
  delete childEnv.ANTHROPIC_AUTH_TOKEN;
  if (home) childEnv.HOME = home;
  // Accumulated state
  let text = '';
  /** @type {Array<{tool:string,status:string,id?:string}>} */
  const toolTrace = [];
  let usage = null;
  // Track open tool_use blocks by content index so we can emit on stop
  // Map<index, { name: string, id: string }>
  const pendingTools = new Map();
  // Track the last tool_use name/id for correlating tool_result events
  // (tool_result arrives after all content_blocks are done, with a tool_use_id)
  const toolById = new Map(); // id → name
  return new Promise((resolve) => {
    let proc;
    try {
      proc = spawn(claudeExe, args, {
        cwd: cwd || process.cwd(),
        env: childEnv,
        stdio: ['ignore', 'pipe', 'pipe'],
      });
    } catch (err) {
      emit({ type: 'error', message: err.message });
      resolve({ text, toolTrace, usage });
      return;
    }
    let timedOut = false;
    const timeout = setTimeout(() => {
      timedOut = true;
      emit({ type: 'error', message: `claude CLI timed out after ${timeoutMs}ms` });
      proc.kill('SIGTERM');
    }, timeoutMs);
    const rl = createInterface({ input: proc.stdout, crlfDelay: Infinity });
    rl.on('line', (line) => {
      if (!line.trim()) return;
      let raw;
      try {
        raw = JSON.parse(line);
      } catch {
        // Non-JSON line — ignore silently (could be debug output)
        return;
      }
      processRawLine(raw);
    });
    proc.stderr.on('data', (chunk) => {
      // Log stderr but don't surface as errors — many are informational
      // (process.stderr is not captured in tests; this is a no-op there)
    });
    proc.on('error', (err) => {
      clearTimeout(timeout);
      emit({ type: 'error', message: err.message });
      resolve({ text, toolTrace, usage });
    });
    proc.on('close', (code) => {
      clearTimeout(timeout);
      if (code !== 0 && code !== null && !timedOut) {
        emit({ type: 'error', message: `claude CLI exited with code ${code}` });
      }
      resolve({ text, toolTrace, usage });
    });
    /**
     * Normalise one parsed JSON line.
     * The CLI emits two shapes:
     *   1. Bare top-level: { type: "system"|"assistant"|"tool_result"|"result"|"error"|"rate_limit_event"|... }
     *   2. Wrapped:        { type: "stream_event", event: { type: "content_block_*"|"message_*", ... } }
     */
    function processRawLine(raw) {
      const t = raw.type;
      if (t === 'stream_event') {
        processStreamEvent(raw.event);
        return;
      }
      // Bare top-level events
      if (t === 'system' || t === 'rate_limit_event') {
        // Ignored — system/init info and rate limit metadata not relevant to UI
        return;
      }
      if (t === 'assistant') {
        // Full message snapshot — duplicates deltas, ignore to avoid doubling text
        return;
      }
      if (t === 'tool_result') {
        // { type: "tool_result", tool_use_id: "...", content: [...] }
        const id = raw.tool_use_id;
        const name = toolById.get(id) || null;
        const result = raw.content;
        const ev = { type: 'tool_result', name, result };
        emit(ev);
        return;
      }
      if (t === 'result') {
        // Final summary: { type:"result", subtype:"success"|"error", total_cost_usd, usage, ... }
        usage = raw.usage || null;
        const ev = { type: 'result', usage, cost: raw.total_cost_usd ?? null };
        emit(ev);
        return;
      }
      if (t === 'error') {
        const message = raw.error?.message || raw.message || JSON.stringify(raw);
        emit({ type: 'error', message });
        return;
      }
      // Unknown top-level type — ignore
    }
    /**
     * Process an unwrapped Anthropic streaming event (the inner `.event` from
     * a stream_event envelope, or a direct API-shaped event).
     */
    function processStreamEvent(ev) {
      if (!ev) return;
      const t = ev.type;
      if (t === 'content_block_start') {
        const block = ev.content_block;
        if (block?.type === 'tool_use') {
          // Track open tool block so we can emit on stop
          pendingTools.set(ev.index, { name: block.name, id: block.id });
          toolById.set(block.id, block.name);
        }
        // text start: no event emitted — we stream via deltas
        return;
      }
      if (t === 'content_block_delta') {
        const d = ev.delta;
        if (!d) return;
        if (d.type === 'text_delta') {
          text += d.text;
          emit({ type: 'delta', text: d.text });
        }
        // input_json_delta: tool input accumulation — no UI event needed mid-stream
        return;
      }
      if (t === 'content_block_stop') {
        const pending = pendingTools.get(ev.index);
        if (pending) {
          pendingTools.delete(ev.index);
          const entry = { tool: pending.name, status: 'done', id: pending.id };
          toolTrace.push(entry);
          emit({ type: 'tool', tool: pending.name, status: 'done', id: pending.id });
        }
        return;
      }
      // message_start, message_delta, message_stop — no normalised events
    }
  });
 }
--- a/tests/ai/claude_cli.test.js
+++ b/tests/ai/claude_cli.test.js
@@ -0,0 +1,92 @@
 import { describe, it, expect } from 'vitest';
 import { fileURLToPath } from 'url';
 import path from 'path';
 import { runClaudeTurn } from '../../lib/ai/claude_cli.js';
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const FAKE_CLAUDE = path.resolve(__dirname, '../fixtures/fake-claude.js');
 // ---------------------------------------------------------------------------
 // Hermetic tests: fake-claude.js emits known stream-json lines; we assert the
 // driver normalises them correctly.  NO real claude, NO network.
 // ---------------------------------------------------------------------------
 describe('runClaudeTurn', () => {
  it('normalises text deltas, tool events, and tool_result from fake-claude output', async () => {
    const collected = [];
    const onEvent = (ev) => collected.push(ev);
    const result = await runClaudeTurn({
      claudeExe: FAKE_CLAUDE,
      sessionId: 'test-session-uuid-0001',
      systemPrompt: 'You are a test assistant.',
      userText: 'hi',
      onEvent,
      timeoutMs: 10_000,
    });
    // --- Collected event assertions ---
    // delta events whose texts concat to 'Hello'
    const deltas = collected.filter(e => e.type === 'delta');
    expect(deltas.length).toBeGreaterThanOrEqual(1);
    expect(deltas.map(e => e.text).join('')).toBe('Hello');
    // tool event for propose_change
    const toolEvents = collected.filter(e => e.type === 'tool');
    expect(toolEvents.length).toBeGreaterThanOrEqual(1);
    expect(toolEvents[0].tool).toBe('propose_change');
    // tool_result event
    const toolResults = collected.filter(e => e.type === 'tool_result');
    expect(toolResults.length).toBe(1);
    expect(toolResults[0].name).toBe('propose_change');
    expect(toolResults[0].result).toBeDefined();
    // result event
    const resultEvents = collected.filter(e => e.type === 'result');
    expect(resultEvents.length).toBe(1);
    expect(resultEvents[0].usage).toBeDefined();
    expect(typeof resultEvents[0].cost).toBe('number');
    // no error events
    const errorEvents = collected.filter(e => e.type === 'error');
    expect(errorEvents).toHaveLength(0);
    // --- Return value assertions ---
    expect(result.text).toBe('Hello');
    expect(result.usage).toBeDefined();
    expect(result.usage.input_tokens).toBe(100);
    // toolTrace must include propose_change
    expect(result.toolTrace).toBeDefined();
    const proposeEntry = result.toolTrace.find(t => t.tool === 'propose_change');
    expect(proposeEntry).toBeDefined();
  });
  it('resolves cleanly on non-zero exit (emits error event, does not throw)', async () => {
    // Use a fake script that exits 1 immediately
    const collected = [];
    const result = await runClaudeTurn({
      claudeExe: 'node',
      // Pass a tiny inline script that exits 1.  We override claudeExe='node'
      // and prepend the inline arg via a wrapper... but the API doesn't support
      // extra args.  Instead we'll use a shell -c workaround via /bin/sh.
      // Simpler: just test via sessionId, and use a real bad path.
      // Actually: claudeExe itself is the executable; let's just use a path
      // that doesn't exist to trigger spawn error.
      sessionId: 'bad-session',
      systemPrompt: 'x',
      userText: 'hi',
      onEvent: (ev) => collected.push(ev),
      timeoutMs: 5_000,
    });
    // Should resolve (not throw) and include an error event
    const errorEvents = collected.filter(e => e.type === 'error');
    expect(errorEvents.length).toBeGreaterThanOrEqual(1);
    // result.text may be empty string on error
    expect(typeof result.text).toBe('string');
  });
 });
--- a/tests/fixtures/fake-claude.js
+++ b/tests/fixtures/fake-claude.js
@@ -0,0 +1,66 @@
 #!/usr/bin/env node
 /**
 * Fake claude CLI for hermetic tests.
 *
 * Mimics the stream-json output format of claude CLI 2.1.159:
 * top-level events are either bare objects (system, assistant, result) or
 * wrapped in {type:"stream_event", event:{...}}.
 *
 * Writes to stdout and exits 0.
 */
 const lines = [
  // system init (ignored by driver)
  { type: 'system', subtype: 'init', session_id: 'fake-session-001', tools: [], cwd: '/tmp' },
  // content_block_start: text block
  { type: 'stream_event', event: { type: 'content_block_start', index: 0, content_block: { type: 'text', text: '' } } },
  // text deltas — together they spell "Hello"
  { type: 'stream_event', event: { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'Hel' } } },
  { type: 'stream_event', event: { type: 'content_block_delta', index: 0, delta: { type: 'text_delta', text: 'lo' } } },
  // assistant snapshot (should be ignored by driver to avoid duplication)
  { type: 'assistant', message: { role: 'assistant', content: [{ type: 'text', text: 'Hello' }] } },
  // content_block_stop for text
  { type: 'stream_event', event: { type: 'content_block_stop', index: 0 } },
  // content_block_start: tool_use
  { type: 'stream_event', event: { type: 'content_block_start', index: 1, content_block: { type: 'tool_use', id: 'toolu_fake01', name: 'propose_change', input: {} } } },
  // tool input delta
  { type: 'stream_event', event: { type: 'content_block_delta', index: 1, delta: { type: 'input_json_delta', partial_json: '{"file":"/tmp/x.js","content":"console.log(1)"}' } } },
  // assistant snapshot for tool_use (ignored)
  { type: 'assistant', message: { role: 'assistant', content: [{ type: 'tool_use', id: 'toolu_fake01', name: 'propose_change', input: { file: '/tmp/x.js', content: 'console.log(1)' } }] } },
  // content_block_stop for tool
  { type: 'stream_event', event: { type: 'content_block_stop', index: 1 } },
  // tool_result event
  { type: 'tool_result', tool_use_id: 'toolu_fake01', content: [{ type: 'text', text: 'change staged' }] },
  // final result
  {
    type: 'result',
    subtype: 'success',
    is_error: false,
    result: 'Hello',
    stop_reason: 'end_turn',
    session_id: 'fake-session-001',
    total_cost_usd: 0.001234,
    usage: {
      input_tokens: 100,
      output_tokens: 10,
      cache_read_input_tokens: 0,
      cache_creation_input_tokens: 0,
    },
  },
 ];
 for (const line of lines) {
  process.stdout.write(JSON.stringify(line) + '\n');
 }
 process.exit(0);