feat: add Gemini CLI session runner + JSONL parser

Subprocess wrapper for `gemini -p --output-format stream-json --yolo` that spawns the Gemini CLI and parses NDJSON events (init, message, tool_use, tool_result, result) into a structured GeminiResult. Includes 10 unit tests for parseGeminiJSONL covering happy path, malformed input, empty input, missing fields, and multi-tool scenarios. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 08:19:21 -07:00 · 2026-03-20 08:19:21 -07:00 · 5018b59913
parent ae2d841012
commit 5018b59913
2 changed files with 305 additions and 0 deletions
--- a/test/helpers/gemini-session-runner.test.ts
+++ b/test/helpers/gemini-session-runner.test.ts
@ -0,0 +1,104 @@
 import { describe, test, expect } from 'bun:test';
 import { parseGeminiJSONL } from './gemini-session-runner';
 // Fixture: actual Gemini CLI stream-json output with tool use
 const FIXTURE_LINES = [
  '{"type":"init","timestamp":"2026-03-20T15:14:46.455Z","session_id":"test-session-123","model":"auto-gemini-3"}',
  '{"type":"message","timestamp":"2026-03-20T15:14:46.456Z","role":"user","content":"list the files"}',
  '{"type":"message","timestamp":"2026-03-20T15:14:49.650Z","role":"assistant","content":"I will list the files.","delta":true}',
  '{"type":"tool_use","timestamp":"2026-03-20T15:14:49.690Z","tool_name":"run_shell_command","tool_id":"cmd_1","parameters":{"command":"ls"}}',
  '{"type":"tool_result","timestamp":"2026-03-20T15:14:49.931Z","tool_id":"cmd_1","status":"success","output":"file1.ts\\nfile2.ts"}',
  '{"type":"message","timestamp":"2026-03-20T15:14:51.945Z","role":"assistant","content":"Here are the files.","delta":true}',
  '{"type":"result","timestamp":"2026-03-20T15:14:52.030Z","status":"success","stats":{"total_tokens":27147,"input_tokens":26928,"output_tokens":87,"cached":0,"duration_ms":5575,"tool_calls":1}}',
 ];
 describe('parseGeminiJSONL', () => {
  test('extracts session ID from init event', () => {
    const parsed = parseGeminiJSONL(FIXTURE_LINES);
    expect(parsed.sessionId).toBe('test-session-123');
  });
  test('concatenates assistant message deltas into output', () => {
    const parsed = parseGeminiJSONL(FIXTURE_LINES);
    expect(parsed.output).toBe('I will list the files.Here are the files.');
  });
  test('ignores user messages', () => {
    const lines = [
      '{"type":"message","role":"user","content":"this should be ignored"}',
      '{"type":"message","role":"assistant","content":"this should be kept","delta":true}',
    ];
    const parsed = parseGeminiJSONL(lines);
    expect(parsed.output).toBe('this should be kept');
  });
  test('extracts tool names from tool_use events', () => {
    const parsed = parseGeminiJSONL(FIXTURE_LINES);
    expect(parsed.toolCalls).toHaveLength(1);
    expect(parsed.toolCalls[0]).toBe('run_shell_command');
  });
  test('extracts total tokens from result stats', () => {
    const parsed = parseGeminiJSONL(FIXTURE_LINES);
    expect(parsed.tokens).toBe(27147);
  });
  test('skips malformed lines without throwing', () => {
    const lines = [
      '{"type":"init","session_id":"ok"}',
      'this is not json',
      '{"type":"message","role":"assistant","content":"hello","delta":true}',
      '{incomplete json',
      '{"type":"result","status":"success","stats":{"total_tokens":100}}',
    ];
    const parsed = parseGeminiJSONL(lines);
    expect(parsed.sessionId).toBe('ok');
    expect(parsed.output).toBe('hello');
    expect(parsed.tokens).toBe(100);
  });
  test('skips empty and whitespace-only lines', () => {
    const lines = [
      '',
      '  ',
      '{"type":"init","session_id":"s1"}',
      '\t',
      '{"type":"result","status":"success","stats":{"total_tokens":50}}',
    ];
    const parsed = parseGeminiJSONL(lines);
    expect(parsed.sessionId).toBe('s1');
    expect(parsed.tokens).toBe(50);
  });
  test('handles empty input', () => {
    const parsed = parseGeminiJSONL([]);
    expect(parsed.output).toBe('');
    expect(parsed.toolCalls).toHaveLength(0);
    expect(parsed.tokens).toBe(0);
    expect(parsed.sessionId).toBeNull();
  });
  test('handles missing fields gracefully', () => {
    const lines = [
      '{"type":"init"}',                              // no session_id
      '{"type":"message","role":"assistant"}',         // no content
      '{"type":"tool_use"}',                           // no tool_name
      '{"type":"result","status":"success"}',          // no stats
    ];
    const parsed = parseGeminiJSONL(lines);
    expect(parsed.sessionId).toBeNull();
    expect(parsed.output).toBe('');
    expect(parsed.toolCalls).toHaveLength(0);
    expect(parsed.tokens).toBe(0);
  });
  test('handles multiple tool_use events', () => {
    const lines = [
      '{"type":"tool_use","tool_name":"run_shell_command","tool_id":"cmd_1","parameters":{"command":"ls"}}',
      '{"type":"tool_use","tool_name":"read_file","tool_id":"cmd_2","parameters":{"path":"foo.ts"}}',
      '{"type":"tool_use","tool_name":"run_shell_command","tool_id":"cmd_3","parameters":{"command":"cat bar.ts"}}',
    ];
    const parsed = parseGeminiJSONL(lines);
    expect(parsed.toolCalls).toEqual(['run_shell_command', 'read_file', 'run_shell_command']);
  });
 });
--- a/test/helpers/gemini-session-runner.ts
+++ b/test/helpers/gemini-session-runner.ts
@ -0,0 +1,201 @@
 /**
 * Gemini CLI subprocess runner for skill E2E testing.
 *
 * Spawns `gemini -p` as an independent process, parses its stream-json
 * output, and returns structured results. Follows the same pattern as
 * codex-session-runner.ts but adapted for the Gemini CLI.
 *
 * Key differences from Codex session-runner:
 * - Uses `gemini -p` instead of `codex exec`
 * - Output is NDJSON with event types: init, message, tool_use, tool_result, result
 * - Uses `--output-format stream-json --yolo` instead of `--json -s read-only`
 * - No temp HOME needed — Gemini discovers skills from `.agents/skills/` in cwd
 * - Message events are streamed with `delta: true` — must concatenate
 */
 import * as path from 'path';
 // --- Interfaces ---
 export interface GeminiResult {
  output: string;           // Full assistant message text (concatenated deltas)
  toolCalls: string[];      // Tool names from tool_use events
  tokens: number;           // Total tokens used
  exitCode: number;         // Process exit code
  durationMs: number;       // Wall clock time
  sessionId: string | null; // Session ID from init event
  rawLines: string[];       // Raw JSONL lines for debugging
 }
 // --- JSONL parser ---
 export interface ParsedGeminiJSONL {
  output: string;
  toolCalls: string[];
  tokens: number;
  sessionId: string | null;
 }
 /**
 * Parse an array of JSONL lines from `gemini -p --output-format stream-json`.
 * Pure function — no I/O, no side effects.
 *
 * Handles these Gemini event types:
 * - init → extract session_id
 * - message (role=assistant, delta=true) → concatenate content into output
 * - tool_use → extract tool_name
 * - tool_result → logged but not extracted
 * - result → extract token usage from stats
 */
 export function parseGeminiJSONL(lines: string[]): ParsedGeminiJSONL {
  const outputParts: string[] = [];
  const toolCalls: string[] = [];
  let tokens = 0;
  let sessionId: string | null = null;
  for (const line of lines) {
    if (!line.trim()) continue;
    try {
      const obj = JSON.parse(line);
      const t = obj.type || '';
      if (t === 'init') {
        const sid = obj.session_id || '';
        if (sid) sessionId = sid;
      } else if (t === 'message') {
        if (obj.role === 'assistant' && obj.content) {
          outputParts.push(obj.content);
        }
      } else if (t === 'tool_use') {
        const name = obj.tool_name || '';
        if (name) toolCalls.push(name);
      } else if (t === 'result') {
        const stats = obj.stats || {};
        tokens = (stats.total_tokens || 0);
      }
    } catch { /* skip malformed lines */ }
  }
  return {
    output: outputParts.join(''),
    toolCalls,
    tokens,
    sessionId,
  };
 }
 // --- Main runner ---
 /**
 * Run a prompt via `gemini -p` and return structured results.
 *
 * Spawns gemini with stream-json output, parses JSONL events,
 * and returns a GeminiResult. Skips gracefully if gemini binary is not found.
 */
 export async function runGeminiSkill(opts: {
  prompt: string;           // What to ask Gemini
  timeoutMs?: number;       // Default 300000 (5 min)
  cwd?: string;             // Working directory (where .agents/skills/ lives)
 }): Promise<GeminiResult> {
  const {
    prompt,
    timeoutMs = 300_000,
    cwd,
  } = opts;
  const startTime = Date.now();
  // Check if gemini binary exists
  const whichResult = Bun.spawnSync(['which', 'gemini']);
  if (whichResult.exitCode !== 0) {
    return {
      output: 'SKIP: gemini binary not found',
      toolCalls: [],
      tokens: 0,
      exitCode: -1,
      durationMs: Date.now() - startTime,
      sessionId: null,
      rawLines: [],
    };
  }
  // Build gemini command
  const args = ['-p', prompt, '--output-format', 'stream-json', '--yolo'];
  // Spawn gemini — uses real HOME for auth, cwd for skill discovery
  const proc = Bun.spawn(['gemini', ...args], {
    cwd: cwd || process.cwd(),
    stdout: 'pipe',
    stderr: 'pipe',
  });
  // Race against timeout
  let timedOut = false;
  const timeoutId = setTimeout(() => {
    timedOut = true;
    proc.kill();
  }, timeoutMs);
  // Stream and collect JSONL from stdout
  const collectedLines: string[] = [];
  const stderrPromise = new Response(proc.stderr).text();
  const reader = proc.stdout.getReader();
  const decoder = new TextDecoder();
  let buf = '';
  try {
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      buf += decoder.decode(value, { stream: true });
      const lines = buf.split('\n');
      buf = lines.pop() || '';
      for (const line of lines) {
        if (!line.trim()) continue;
        collectedLines.push(line);
        // Real-time progress to stderr
        try {
          const event = JSON.parse(line);
          if (event.type === 'tool_use' && event.tool_name) {
            const elapsed = Math.round((Date.now() - startTime) / 1000);
            process.stderr.write(`  [gemini ${elapsed}s] tool: ${event.tool_name}\n`);
          } else if (event.type === 'message' && event.role === 'assistant' && event.content) {
            const elapsed = Math.round((Date.now() - startTime) / 1000);
            process.stderr.write(`  [gemini ${elapsed}s] message: ${event.content.slice(0, 100)}\n`);
          }
        } catch { /* skip — parseGeminiJSONL will handle it later */ }
      }
    }
  } catch { /* stream read error — fall through to exit code handling */ }
  // Flush remaining buffer
  if (buf.trim()) {
    collectedLines.push(buf);
  }
  const stderr = await stderrPromise;
  const exitCode = await proc.exited;
  clearTimeout(timeoutId);
  const durationMs = Date.now() - startTime;
  // Parse all collected JSONL lines
  const parsed = parseGeminiJSONL(collectedLines);
  // Log stderr if non-empty (may contain auth errors, etc.)
  if (stderr.trim()) {
    process.stderr.write(`  [gemini stderr] ${stderr.trim().slice(0, 200)}\n`);
  }
  return {
    output: parsed.output,
    toolCalls: parsed.toolCalls,
    tokens: parsed.tokens,
    exitCode: timedOut ? 124 : exitCode,
    durationMs,
    sessionId: parsed.sessionId,
    rawLines: collectedLines,
  };
 }