feat(hooks): PostToolUse capture hook for AskUserQuestion

Plan-tune cathedral T5. Closes the substrate hole that motivated this entire branch: agent-compliance-only logging produced zero events in weeks of dogfood. PostToolUse hook captures every AUQ fire deterministically. What ships: - hosts/claude/hooks/question-log-hook.ts — TS hook that reads Claude Code's hook stdin, walks tool_input.questions[*], extracts user choice + recommended option from tool_response, spawns gstack-question-log per question. - hosts/claude/hooks/question-log-hook — bash shim Claude Code's hook runner invokes; execs bun against the .ts file. - Marker-first question_id extraction (D18 progressive markers): <gstack-qid:foo-bar> stripped from question text, used as the id. Hash fallback hook-<sha1[:10]> for unmarked questions (observed-only, never used as preference key — D18 hash drift mitigation). - (recommended) label parsing for the user_choice/recommended fields, with refuse-on-ambiguous when two labels are present (D2 safety). - Free-text capture: source=auq-other + free_text field when user picks Other and types (Layer 8 dream cycle input). - Matcher covers both native AskUserQuestion and mcp__*__AskUserQuestion (Codex/Conductor catch from outside voice review). - Crash safety: always exits 0; errors land in ~/.gstack/hook-errors.log so the user's session is never blocked by a hook failure. gstack-question-log extended to: - Accept `source` field (default 'agent', new values: hook, auq-other, auto-decided, codex-import-marker, codex-import-pattern). - Accept `tool_use_id` (<=128 chars) for dedup. - Composite dedup on (source, tool_use_id) across the last 100 lines — protects against hook + preamble both firing on the same tool call (D3 belt+suspenders). - Async fire `gstack-developer-profile --derive` after each successful write so inferred.sample_size actually grows (D17 — without this, the cathedral's "before 0, after >0" metric never moves). - GSTACK_QUESTION_LOG_NO_DERIVE=1 escape hatch for tests. 9 new unit tests covering capture, marker extraction, MCP variant, free-text, dedup, ambiguous-recommended safety, crash paths. All pass plus the existing 88 tests across related files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 07:42:46 -07:00 · 2026-05-27 07:42:46 -07:00 · a8a0447870
parent 2147532c07
commit a8a0447870
4 changed files with 658 additions and 2 deletions
--- a/bin/gstack-question-log
+++ b/bin/gstack-question-log
@ -50,12 +50,48 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) {
  process.exit(1);
 }
-// Required: question_id (kebab-case, <=64 chars)
+// Required: question_id (kebab-case, <=64 chars).
 // Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is
 // kebab-case-compatible and passes the same regex.
 if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
  process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
  process.exit(1);
 }
 // Optional: source — tags which writer produced this event.
 //   'agent' (default) — preamble-driven write from inside the running agent
 //   'hook'             — PostToolUse hook captured it deterministically (T5)
 //   'auq-other'        — user picked 'Other' and typed free text (Layer 8)
 //   'auto-decided'     — PreToolUse enforcement hook substituted the answer (T6)
 //   'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex
 const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern'];
 if (j.source !== undefined) {
  if (!ALLOWED_SOURCES.includes(j.source)) {
    process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n');
    process.exit(1);
  }
 } else {
  j.source = 'agent';
 }
 // Optional: tool_use_id — Claude Code hook stdin field; used for dedup.
 if (j.tool_use_id !== undefined) {
  if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) {
    process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n');
    process.exit(1);
  }
 }
 // Optional: free_text — sanitize (no newlines, <=300 chars).
 if (j.free_text !== undefined) {
  if (typeof j.free_text !== 'string') {
    process.stderr.write('gstack-question-log: free_text must be string\n');
    process.exit(1);
  }
  if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300);
  j.free_text = j.free_text.replace(/\n+/g, ' ');
 }
 // Required: question_summary (non-empty, <=200 chars, no newlines)
 if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
  process.stderr.write('gstack-question-log: question_summary required\n');
@ -165,7 +201,49 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then
  exit 1
 fi
-echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
+LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
 # Cathedral T5: composite-source dedup. If this exact (source, tool_use_id)
 # was already logged within the last 100 lines, skip — protects against
 # hook + agent both writing the same fire (D3 plan-tune cathedral decision).
 # Lookup is bounded so the bin stays cheap on hot paths.
 DEDUP_SKIP=""
 if [ -f "$LOG_FILE" ]; then
  DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e '
    const fs = require("fs");
    const j = JSON.parse(process.env.VALIDATED_JSON);
    if (!j.tool_use_id) { console.log(""); process.exit(0); }
    const want = j.source + ":" + j.tool_use_id;
    const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100);
    for (const ln of lines) {
      try {
        const p = JSON.parse(ln);
        if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) {
          console.log("dup");
          process.exit(0);
        }
      } catch {}
    }
    console.log("");
  ' 2>/dev/null)
 fi
 if [ "$DEDUP_SKIP" = "dup" ]; then
  echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)"
  exit 0
 fi
 echo "$VALIDATED" >> "$LOG_FILE"
 # Cathedral T5: fire-and-forget --derive so inferred dimensions stay current
 # without per-event latency (D17). Sub-second op; output suppressed; never
 # blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for
 # tests that don't want the side effect.
 if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then
  (
    nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 &
  ) >/dev/null 2>&1
 fi
 # NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
 # Per Codex v2 review, audit/derivation data stays local alongside the
--- a/hosts/claude/hooks/question-log-hook
+++ b/hosts/claude/hooks/question-log-hook
@ -0,0 +1,7 @@
 #!/usr/bin/env bash
 # Bash shim — Claude Code hooks run `command` strings via /bin/sh, so this
 # wrapper makes the TypeScript hook executable via bun. Settings.json
 # references this file directly.
 set -e
 HERE="$(cd "$(dirname "$0")" && pwd)"
 exec bun "$HERE/question-log-hook.ts"
--- a/hosts/claude/hooks/question-log-hook.ts
+++ b/hosts/claude/hooks/question-log-hook.ts
@ -0,0 +1,286 @@
 #!/usr/bin/env bun
 /**
 * PostToolUse hook for AskUserQuestion (Claude Code, plan-tune cathedral T5).
 *
 * Reads hook stdin JSON, extracts every AUQ question + user choice from the
 * tool_input/tool_response, and writes them via gstack-question-log so the
 * substrate captures fires deterministically — no agent compliance required.
 *
 * Triggered by ~/.claude/settings.json:
 *   {
 *     "hooks": {
 *       "PostToolUse": [
 *         {
 *           "matcher": "(AskUserQuestion|mcp__.*__AskUserQuestion)",
 *           "hooks": [
 *             { "type": "command",
 *               "command": "$CLAUDE_PROJECT_DIR/.claude/skills/gstack/hosts/claude/hooks/question-log-hook",
 *               "timeout": 5 }
 *           ]
 *         }
 *       ]
 *     }
 *   }
 *
 * Invariants:
 *   - Always exits 0. A failing hook MUST NOT block the user's session.
 *     Errors land in ~/.gstack/hook-errors.log for postmortem.
 *   - Spawns gstack-question-log as a subprocess; that bin handles
 *     validation, dedup (source+tool_use_id), async derive.
 *   - Marker-first question_id (`<gstack-qid:foo-bar>`), hash fallback
 *     (D18 progressive markers).
 *
 * See docs/spikes/claude-code-hook-mutation.md for the protocol contract.
 */
 import * as crypto from 'crypto';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { spawnSync } from 'child_process';
 interface HookStdin {
  session_id?: string;
  hook_event_name?: string;
  tool_name?: string;
  tool_use_id?: string;
  tool_input?: {
    questions?: Array<{
      question?: string;
      options?: Array<string | { label?: string; description?: string }>;
      multiSelect?: boolean;
    }>;
  };
  tool_response?: unknown;
  cwd?: string;
 }
 interface ExtractedQuestion {
  question_id: string;
  question_summary: string;
  options_count: number;
  user_choice: string;
  recommended?: string;
  free_text?: string;
  category?: string;
  door_type?: string;
 }
 const MARKER_RE = /<gstack-qid:([a-z0-9-]{1,64})>/i;
 const RECOMMENDED_LABEL_RE = /\(recommended\)\s*$/i;
 function logHookError(msg: string): void {
  try {
    const stateRoot =
      process.env.GSTACK_STATE_ROOT ||
      process.env.GSTACK_HOME ||
      path.join(os.homedir(), '.gstack');
    fs.mkdirSync(stateRoot, { recursive: true });
    fs.appendFileSync(
      path.join(stateRoot, 'hook-errors.log'),
      `${new Date().toISOString()} question-log-hook: ${msg}\n`,
    );
  } catch {
    // Last-resort: swallow. Hook must not block.
  }
 }
 function readStdin(): Promise<string> {
  return new Promise((resolve) => {
    let buf = '';
    process.stdin.setEncoding('utf-8');
    process.stdin.on('data', (chunk) => (buf += chunk));
    process.stdin.on('end', () => resolve(buf));
    process.stdin.on('error', () => resolve(buf));
    // Hard cutoff so we don't hang the user's session waiting for stdin.
    setTimeout(() => resolve(buf), 2000);
  });
 }
 function hashQuestionId(skill: string, question: string, options: string[]): string {
  const sorted = [...options].sort().join('|');
  const h = crypto
    .createHash('sha1')
    .update(`${skill}::${question}::${sorted}`)
    .digest('hex');
  return `hook-${h.slice(0, 10)}`;
 }
 /**
 * Marker-first id extraction. Returns the marker id (stripped of the
 * <gstack-qid:...> wrapper) when present, else a hash-based hook- id.
 * Per D18 progressive markers — hash ids are observed-only, never used
 * as preference keys.
 */
 function extractQuestionId(
  skill: string,
  questionText: string,
  options: string[],
 ): { id: string; marker_present: boolean; stripped_question: string } {
  const match = questionText.match(MARKER_RE);
  if (match) {
    return {
      id: match[1],
      marker_present: true,
      stripped_question: questionText.replace(MARKER_RE, '').trim(),
    };
  }
  return {
    id: hashQuestionId(skill, questionText, options),
    marker_present: false,
    stripped_question: questionText,
  };
 }
 function optionLabels(opts: Array<string | { label?: string; description?: string }>): string[] {
  return opts.map((o) => (typeof o === 'string' ? o : o.label || o.description || ''));
 }
 /**
 * Parse "(recommended)" label-first per D2; fall back to "Recommendation: X"
 * prose match; refuse (return undefined) if ambiguous.
 */
 function extractRecommended(questionText: string, opts: string[]): string | undefined {
  const labelMatches = opts.filter((o) => RECOMMENDED_LABEL_RE.test(o));
  if (labelMatches.length === 1) return labelMatches[0].replace(RECOMMENDED_LABEL_RE, '').trim();
  if (labelMatches.length > 1) return undefined; // ambiguous
  const m = questionText.match(/Recommendation:\s*([^\n]+)/i);
  if (!m) return undefined;
  const recPhrase = m[1].trim();
  const matchByPrefix = opts.find((o) => o.toLowerCase().startsWith(recPhrase.toLowerCase().slice(0, 12)));
  return matchByPrefix;
 }
 /**
 * Best-effort extraction of which option the user picked per question.
 * AUQ tool_response shape varies by Claude Code variant (native vs MCP),
 * and the hook stdin docs don't pin a single canonical shape. We handle
 * the common cases gracefully.
 */
 function extractUserChoices(
  response: unknown,
  questionCount: number,
 ): Array<{ choice: string; free_text?: string }> {
  const out: Array<{ choice: string; free_text?: string }> = [];
  if (!response) {
    for (let i = 0; i < questionCount; i++) out.push({ choice: '__unknown__' });
    return out;
  }
  // Shape A: { answers: [{option_label, free_text?}] }
  // Shape B: { questions: [{user_answer}] }
  // Shape C: { content: [...] } or array.
  // We probe lazily.
  const rec = response as Record<string, unknown>;
  if (Array.isArray(rec.answers)) {
    for (const a of rec.answers as Array<Record<string, unknown>>) {
      const choice = (a.option_label || a.label || a.choice || a.answer || '__unknown__') as string;
      const freeText = (a.free_text || a.other_text) as string | undefined;
      out.push(freeText ? { choice, free_text: freeText } : { choice });
    }
    while (out.length < questionCount) out.push({ choice: '__unknown__' });
    return out;
  }
  if (Array.isArray(rec.questions)) {
    for (const q of rec.questions as Array<Record<string, unknown>>) {
      const choice = (q.user_answer || q.answer || q.choice || '__unknown__') as string;
      out.push({ choice });
    }
    while (out.length < questionCount) out.push({ choice: '__unknown__' });
    return out;
  }
  // Fall back: stringify and log first 100 chars to help future debugging.
  for (let i = 0; i < questionCount; i++) {
    out.push({ choice: `__response-shape-unknown:${JSON.stringify(response).slice(0, 80)}__` });
  }
  return out;
 }
 function detectSkill(cwd: string | undefined): string {
  // Best-effort: cwd often contains the project slug but rarely the running
  // skill. Without a session-state mechanism, leave as 'unknown' — the
  // skill marker (<gstack-skill:NAME>) embedded in question text per
  // future plan-tune work is the durable path.
  void cwd;
  return 'unknown';
 }
 function spawnLog(payload: Record<string, unknown>): void {
  // Locate the bin relative to this script's directory.
  const here = path.dirname(new URL(import.meta.url).pathname);
  // hosts/claude/hooks/ -> ../../../bin/
  const repoRoot = path.resolve(here, '..', '..', '..');
  const bin = path.join(repoRoot, 'bin', 'gstack-question-log');
  const res = spawnSync(bin, [JSON.stringify(payload)], {
    encoding: 'utf-8',
    stdio: ['ignore', 'pipe', 'pipe'],
    timeout: 3000,
  });
  if (res.status !== 0) {
    logHookError(`gstack-question-log exited ${res.status}: ${res.stderr || res.stdout}`);
  }
 }
 async function main(): Promise<void> {
  const raw = await readStdin();
  if (!raw.trim()) {
    process.exit(0);
  }
  let stdin: HookStdin;
  try {
    stdin = JSON.parse(raw);
  } catch (e) {
    logHookError(`stdin parse failed: ${(e as Error).message}`);
    process.exit(0);
  }
  const toolName = stdin.tool_name || '';
  if (
    toolName !== 'AskUserQuestion' &&
    !toolName.match(/^mcp__.+__AskUserQuestion$/)
  ) {
    // Matcher should have filtered this out; defensive no-op.
    process.exit(0);
  }
  const questions = stdin.tool_input?.questions || [];
  if (questions.length === 0) {
    process.exit(0);
  }
  const skill = detectSkill(stdin.cwd);
  const choices = extractUserChoices(stdin.tool_response, questions.length);
  for (let i = 0; i < questions.length; i++) {
    const q = questions[i];
    const qText = q.question || '';
    if (!qText) continue;
    const opts = optionLabels(q.options || []);
    const { id, stripped_question } = extractQuestionId(skill, qText, opts);
    const recommended = extractRecommended(stripped_question, opts);
    const summary = stripped_question.slice(0, 200);
    const choice = choices[i] || { choice: '__unknown__' };
    const payload: Record<string, unknown> = {
      skill,
      question_id: id,
      question_summary: summary,
      options_count: opts.length,
      user_choice: String(choice.choice).slice(0, 64),
      source: choice.free_text ? 'auq-other' : 'hook',
      session_id: stdin.session_id?.slice(0, 64),
      tool_use_id: stdin.tool_use_id?.slice(0, 128),
    };
    if (recommended) payload.recommended = recommended.slice(0, 64);
    if (choice.free_text) payload.free_text = String(choice.free_text);
    spawnLog(payload);
  }
  process.exit(0);
 }
 main().catch((e) => {
  logHookError(`main crash: ${(e as Error).message}`);
  process.exit(0);
 });
--- a/test/question-log-hook.test.ts
+++ b/test/question-log-hook.test.ts
@ -0,0 +1,285 @@
 /**
 * PostToolUse hook (plan-tune cathedral T5) — unit tests.
 *
 * Feeds the hook synthetic Claude Code hook payloads via stdin and asserts
 * the resulting question-log.jsonl reflects the right schema. Covers:
 *   - Marker-first question_id (D18 progressive markers)
 *   - Hash fallback when no marker
 *   - source=hook tagging
 *   - source=auq-other when free_text present
 *   - Dedup on (source, tool_use_id) composite (D3)
 *   - Hook exits 0 even on malformed input (never blocks user session)
 *   - mcp__*__AskUserQuestion matcher acceptance
 *   - "(recommended)" label parse → recommended field populated
 *   - Refuse-on-ambiguous: two (recommended) labels → recommended omitted
 */
 import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
 import { spawnSync } from 'child_process';
 const ROOT = path.resolve(import.meta.dir, '..');
 const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-log-hook');
 let stateRoot: string;
 beforeEach(() => {
  stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-hooklog-'));
  // Pre-create slug-resolved project dir so the bin's gstack-slug doesn't
  // recompute every time.
 });
 afterEach(() => {
  fs.rmSync(stateRoot, { recursive: true, force: true });
 });
 function runHook(stdin: object): { stdout: string; stderr: string; status: number } {
  const env: Record<string, string> = {};
  for (const [k, v] of Object.entries(process.env)) {
    if (v !== undefined) env[k] = v;
  }
  env.GSTACK_STATE_ROOT = stateRoot;
  delete env.GSTACK_HOME;
  env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
  const res = spawnSync(HOOK, [], {
    env,
    input: JSON.stringify(stdin),
    encoding: 'utf-8',
    cwd: ROOT,
  });
  return {
    stdout: res.stdout ?? '',
    stderr: res.stderr ?? '',
    status: res.status ?? -1,
  };
 }
 function readLog(): Array<Record<string, unknown>> {
  const projectDirs = fs.existsSync(path.join(stateRoot, 'projects'))
    ? fs.readdirSync(path.join(stateRoot, 'projects'))
    : [];
  const all: Array<Record<string, unknown>> = [];
  for (const d of projectDirs) {
    const f = path.join(stateRoot, 'projects', d, 'question-log.jsonl');
    if (!fs.existsSync(f)) continue;
    const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean);
    for (const l of lines) {
      try {
        all.push(JSON.parse(l));
      } catch {
        // skip malformed
      }
    }
  }
  return all;
 }
 // ----------------------------------------------------------------------
 // Native AskUserQuestion capture
 // ----------------------------------------------------------------------
 describe('PostToolUse hook (native AskUserQuestion)', () => {
  test('captures one event per question with source=hook and tool_use_id', () => {
    const r = runHook({
      session_id: 'sess1',
      hook_event_name: 'PostToolUse',
      tool_name: 'AskUserQuestion',
      tool_use_id: 'tu-1',
      tool_input: {
        questions: [
          {
            question: 'D1 — Test capture\nRecommendation: A',
            options: ['A) Accept (recommended)', 'B) Reject'],
            multiSelect: false,
          },
        ],
      },
      tool_response: {
        answers: [{ option_label: 'A) Accept (recommended)' }],
      },
      cwd: ROOT,
    });
    expect(r.status).toBe(0);
    const events = readLog();
    expect(events.length).toBe(1);
    expect(events[0].source).toBe('hook');
    expect(events[0].tool_use_id).toBe('tu-1');
    expect(events[0].session_id).toBe('sess1');
    expect(typeof events[0].question_id).toBe('string');
    expect((events[0].question_id as string).startsWith('hook-')).toBe(true);
    expect(events[0].user_choice).toContain('Accept');
    // Recommended parsed from (recommended) label
    expect(events[0].recommended).toContain('Accept');
  });
  test('marker-first question_id when <gstack-qid:foo> present', () => {
    runHook({
      session_id: 'sess2',
      tool_name: 'AskUserQuestion',
      tool_use_id: 'tu-2',
      tool_input: {
        questions: [
          {
            question: 'D2 — Marker test <gstack-qid:ship-test-failure-triage>\nRecommendation: A',
            options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'],
          },
        ],
      },
      tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] },
      cwd: ROOT,
    });
    const events = readLog();
    expect(events.length).toBe(1);
    expect(events[0].question_id).toBe('ship-test-failure-triage');
    // Marker stripped from summary
    expect((events[0].question_summary as string).includes('<gstack-qid:')).toBe(false);
  });
 });
 // ----------------------------------------------------------------------
 // MCP AskUserQuestion variant (Conductor)
 // ----------------------------------------------------------------------
 describe('PostToolUse hook (mcp__*__AskUserQuestion variant)', () => {
  test('accepts mcp__conductor__AskUserQuestion tool_name', () => {
    const r = runHook({
      session_id: 'sess3',
      tool_name: 'mcp__conductor__AskUserQuestion',
      tool_use_id: 'tu-3',
      tool_input: {
        questions: [{ question: 'Test', options: ['A', 'B'] }],
      },
      tool_response: { answers: [{ option_label: 'A' }] },
      cwd: ROOT,
    });
    expect(r.status).toBe(0);
    expect(readLog().length).toBe(1);
  });
  test('ignores unrelated tool_name (defensive)', () => {
    const r = runHook({
      session_id: 'sess4',
      tool_name: 'Bash',
      tool_use_id: 'tu-4',
      tool_input: {},
      cwd: ROOT,
    });
    expect(r.status).toBe(0);
    expect(readLog().length).toBe(0);
  });
 });
 // ----------------------------------------------------------------------
 // Free-text capture (Layer 8 dream cycle)
 // ----------------------------------------------------------------------
 describe('PostToolUse hook (free-text "Other" responses)', () => {
  test('source=auq-other and free_text populated when user types free text', () => {
    runHook({
      session_id: 'sess5',
      tool_name: 'AskUserQuestion',
      tool_use_id: 'tu-5',
      tool_input: {
        questions: [{ question: 'D5 — Other test', options: ['A', 'B'] }],
      },
      tool_response: {
        answers: [
          {
            option_label: 'Other',
            free_text: 'I always include tests with new features',
          },
        ],
      },
      cwd: ROOT,
    });
    const events = readLog();
    expect(events.length).toBe(1);
    expect(events[0].source).toBe('auq-other');
    expect(events[0].free_text).toContain('always include tests');
  });
 });
 // ----------------------------------------------------------------------
 // Dedup
 // ----------------------------------------------------------------------
 describe('PostToolUse hook (dedup on source + tool_use_id)', () => {
  test('second fire with same (source, tool_use_id) is dropped', () => {
    const payload = {
      session_id: 'sess6',
      tool_name: 'AskUserQuestion',
      tool_use_id: 'tu-6',
      tool_input: { questions: [{ question: 'Dedup test', options: ['A'] }] },
      tool_response: { answers: [{ option_label: 'A' }] },
      cwd: ROOT,
    };
    runHook(payload);
    runHook(payload);
    expect(readLog().length).toBe(1);
  });
 });
 // ----------------------------------------------------------------------
 // Refuse-on-ambiguous (D2 safety)
 // ----------------------------------------------------------------------
 describe('PostToolUse hook (recommended parser safety)', () => {
  test('two (recommended) labels → recommended field omitted', () => {
    runHook({
      session_id: 'sess7',
      tool_name: 'AskUserQuestion',
      tool_use_id: 'tu-7',
      tool_input: {
        questions: [
          {
            question: 'Ambiguous test',
            options: ['A) Foo (recommended)', 'B) Bar (recommended)'],
          },
        ],
      },
      tool_response: { answers: [{ option_label: 'A) Foo (recommended)' }] },
      cwd: ROOT,
    });
    const events = readLog();
    expect(events.length).toBe(1);
    expect(events[0].recommended).toBeUndefined();
  });
 });
 // ----------------------------------------------------------------------
 // Crash safety
 // ----------------------------------------------------------------------
 describe('PostToolUse hook (crash safety)', () => {
  test('exits 0 on empty stdin', () => {
    const env: Record<string, string> = {};
    for (const [k, v] of Object.entries(process.env)) {
      if (v !== undefined) env[k] = v;
    }
    env.GSTACK_STATE_ROOT = stateRoot;
    env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
    const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' });
    expect(res.status).toBe(0);
  });
  test('exits 0 on malformed JSON', () => {
    const env: Record<string, string> = {};
    for (const [k, v] of Object.entries(process.env)) {
      if (v !== undefined) env[k] = v;
    }
    env.GSTACK_STATE_ROOT = stateRoot;
    env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
    const res = spawnSync(HOOK, [], {
      env,
      input: 'not json',
      encoding: 'utf-8',
    });
    expect(res.status).toBe(0);
    // Error logged to hook-errors.log
    const errLog = path.join(stateRoot, 'hook-errors.log');
    expect(fs.existsSync(errLog)).toBe(true);
    expect(fs.readFileSync(errLog, 'utf-8')).toContain('stdin parse failed');
  });
 });