diff --git a/bin/gstack-question-log b/bin/gstack-question-log index c5d664008..b8b266e8e 100755 --- a/bin/gstack-question-log +++ b/bin/gstack-question-log @@ -50,12 +50,48 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) { process.exit(1); } -// Required: question_id (kebab-case, <=64 chars) +// Required: question_id (kebab-case, <=64 chars). +// Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is +// kebab-case-compatible and passes the same regex. if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) { process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n'); process.exit(1); } +// Optional: source — tags which writer produced this event. +// 'agent' (default) — preamble-driven write from inside the running agent +// 'hook' — PostToolUse hook captured it deterministically (T5) +// 'auq-other' — user picked 'Other' and typed free text (Layer 8) +// 'auto-decided' — PreToolUse enforcement hook substituted the answer (T6) +// 'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex +const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern']; +if (j.source !== undefined) { + if (!ALLOWED_SOURCES.includes(j.source)) { + process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n'); + process.exit(1); + } +} else { + j.source = 'agent'; +} + +// Optional: tool_use_id — Claude Code hook stdin field; used for dedup. +if (j.tool_use_id !== undefined) { + if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) { + process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n'); + process.exit(1); + } +} + +// Optional: free_text — sanitize (no newlines, <=300 chars). +if (j.free_text !== undefined) { + if (typeof j.free_text !== 'string') { + process.stderr.write('gstack-question-log: free_text must be string\n'); + process.exit(1); + } + if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300); + j.free_text = j.free_text.replace(/\n+/g, ' '); +} + // Required: question_summary (non-empty, <=200 chars, no newlines) if (typeof j.question_summary !== 'string' || !j.question_summary.length) { process.stderr.write('gstack-question-log: question_summary required\n'); @@ -165,7 +201,49 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then exit 1 fi -echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl" +LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl" + +# Cathedral T5: composite-source dedup. If this exact (source, tool_use_id) +# was already logged within the last 100 lines, skip — protects against +# hook + agent both writing the same fire (D3 plan-tune cathedral decision). +# Lookup is bounded so the bin stays cheap on hot paths. +DEDUP_SKIP="" +if [ -f "$LOG_FILE" ]; then + DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e ' + const fs = require("fs"); + const j = JSON.parse(process.env.VALIDATED_JSON); + if (!j.tool_use_id) { console.log(""); process.exit(0); } + const want = j.source + ":" + j.tool_use_id; + const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100); + for (const ln of lines) { + try { + const p = JSON.parse(ln); + if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) { + console.log("dup"); + process.exit(0); + } + } catch {} + } + console.log(""); + ' 2>/dev/null) +fi + +if [ "$DEDUP_SKIP" = "dup" ]; then + echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)" + exit 0 +fi + +echo "$VALIDATED" >> "$LOG_FILE" + +# Cathedral T5: fire-and-forget --derive so inferred dimensions stay current +# without per-event latency (D17). Sub-second op; output suppressed; never +# blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for +# tests that don't want the side effect. +if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then + ( + nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 & + ) >/dev/null 2>&1 +fi # NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync. # Per Codex v2 review, audit/derivation data stays local alongside the diff --git a/hosts/claude/hooks/question-log-hook b/hosts/claude/hooks/question-log-hook new file mode 100755 index 000000000..3dfcd29f9 --- /dev/null +++ b/hosts/claude/hooks/question-log-hook @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# Bash shim — Claude Code hooks run `command` strings via /bin/sh, so this +# wrapper makes the TypeScript hook executable via bun. Settings.json +# references this file directly. +set -e +HERE="$(cd "$(dirname "$0")" && pwd)" +exec bun "$HERE/question-log-hook.ts" diff --git a/hosts/claude/hooks/question-log-hook.ts b/hosts/claude/hooks/question-log-hook.ts new file mode 100644 index 000000000..4a0bab6de --- /dev/null +++ b/hosts/claude/hooks/question-log-hook.ts @@ -0,0 +1,286 @@ +#!/usr/bin/env bun +/** + * PostToolUse hook for AskUserQuestion (Claude Code, plan-tune cathedral T5). + * + * Reads hook stdin JSON, extracts every AUQ question + user choice from the + * tool_input/tool_response, and writes them via gstack-question-log so the + * substrate captures fires deterministically — no agent compliance required. + * + * Triggered by ~/.claude/settings.json: + * { + * "hooks": { + * "PostToolUse": [ + * { + * "matcher": "(AskUserQuestion|mcp__.*__AskUserQuestion)", + * "hooks": [ + * { "type": "command", + * "command": "$CLAUDE_PROJECT_DIR/.claude/skills/gstack/hosts/claude/hooks/question-log-hook", + * "timeout": 5 } + * ] + * } + * ] + * } + * } + * + * Invariants: + * - Always exits 0. A failing hook MUST NOT block the user's session. + * Errors land in ~/.gstack/hook-errors.log for postmortem. + * - Spawns gstack-question-log as a subprocess; that bin handles + * validation, dedup (source+tool_use_id), async derive. + * - Marker-first question_id (``), hash fallback + * (D18 progressive markers). + * + * See docs/spikes/claude-code-hook-mutation.md for the protocol contract. + */ +import * as crypto from 'crypto'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { spawnSync } from 'child_process'; + +interface HookStdin { + session_id?: string; + hook_event_name?: string; + tool_name?: string; + tool_use_id?: string; + tool_input?: { + questions?: Array<{ + question?: string; + options?: Array; + multiSelect?: boolean; + }>; + }; + tool_response?: unknown; + cwd?: string; +} + +interface ExtractedQuestion { + question_id: string; + question_summary: string; + options_count: number; + user_choice: string; + recommended?: string; + free_text?: string; + category?: string; + door_type?: string; +} + +const MARKER_RE = //i; +const RECOMMENDED_LABEL_RE = /\(recommended\)\s*$/i; + +function logHookError(msg: string): void { + try { + const stateRoot = + process.env.GSTACK_STATE_ROOT || + process.env.GSTACK_HOME || + path.join(os.homedir(), '.gstack'); + fs.mkdirSync(stateRoot, { recursive: true }); + fs.appendFileSync( + path.join(stateRoot, 'hook-errors.log'), + `${new Date().toISOString()} question-log-hook: ${msg}\n`, + ); + } catch { + // Last-resort: swallow. Hook must not block. + } +} + +function readStdin(): Promise { + return new Promise((resolve) => { + let buf = ''; + process.stdin.setEncoding('utf-8'); + process.stdin.on('data', (chunk) => (buf += chunk)); + process.stdin.on('end', () => resolve(buf)); + process.stdin.on('error', () => resolve(buf)); + // Hard cutoff so we don't hang the user's session waiting for stdin. + setTimeout(() => resolve(buf), 2000); + }); +} + +function hashQuestionId(skill: string, question: string, options: string[]): string { + const sorted = [...options].sort().join('|'); + const h = crypto + .createHash('sha1') + .update(`${skill}::${question}::${sorted}`) + .digest('hex'); + return `hook-${h.slice(0, 10)}`; +} + +/** + * Marker-first id extraction. Returns the marker id (stripped of the + * wrapper) when present, else a hash-based hook- id. + * Per D18 progressive markers — hash ids are observed-only, never used + * as preference keys. + */ +function extractQuestionId( + skill: string, + questionText: string, + options: string[], +): { id: string; marker_present: boolean; stripped_question: string } { + const match = questionText.match(MARKER_RE); + if (match) { + return { + id: match[1], + marker_present: true, + stripped_question: questionText.replace(MARKER_RE, '').trim(), + }; + } + return { + id: hashQuestionId(skill, questionText, options), + marker_present: false, + stripped_question: questionText, + }; +} + +function optionLabels(opts: Array): string[] { + return opts.map((o) => (typeof o === 'string' ? o : o.label || o.description || '')); +} + +/** + * Parse "(recommended)" label-first per D2; fall back to "Recommendation: X" + * prose match; refuse (return undefined) if ambiguous. + */ +function extractRecommended(questionText: string, opts: string[]): string | undefined { + const labelMatches = opts.filter((o) => RECOMMENDED_LABEL_RE.test(o)); + if (labelMatches.length === 1) return labelMatches[0].replace(RECOMMENDED_LABEL_RE, '').trim(); + if (labelMatches.length > 1) return undefined; // ambiguous + + const m = questionText.match(/Recommendation:\s*([^\n]+)/i); + if (!m) return undefined; + const recPhrase = m[1].trim(); + const matchByPrefix = opts.find((o) => o.toLowerCase().startsWith(recPhrase.toLowerCase().slice(0, 12))); + return matchByPrefix; +} + +/** + * Best-effort extraction of which option the user picked per question. + * AUQ tool_response shape varies by Claude Code variant (native vs MCP), + * and the hook stdin docs don't pin a single canonical shape. We handle + * the common cases gracefully. + */ +function extractUserChoices( + response: unknown, + questionCount: number, +): Array<{ choice: string; free_text?: string }> { + const out: Array<{ choice: string; free_text?: string }> = []; + if (!response) { + for (let i = 0; i < questionCount; i++) out.push({ choice: '__unknown__' }); + return out; + } + // Shape A: { answers: [{option_label, free_text?}] } + // Shape B: { questions: [{user_answer}] } + // Shape C: { content: [...] } or array. + // We probe lazily. + const rec = response as Record; + if (Array.isArray(rec.answers)) { + for (const a of rec.answers as Array>) { + const choice = (a.option_label || a.label || a.choice || a.answer || '__unknown__') as string; + const freeText = (a.free_text || a.other_text) as string | undefined; + out.push(freeText ? { choice, free_text: freeText } : { choice }); + } + while (out.length < questionCount) out.push({ choice: '__unknown__' }); + return out; + } + if (Array.isArray(rec.questions)) { + for (const q of rec.questions as Array>) { + const choice = (q.user_answer || q.answer || q.choice || '__unknown__') as string; + out.push({ choice }); + } + while (out.length < questionCount) out.push({ choice: '__unknown__' }); + return out; + } + // Fall back: stringify and log first 100 chars to help future debugging. + for (let i = 0; i < questionCount; i++) { + out.push({ choice: `__response-shape-unknown:${JSON.stringify(response).slice(0, 80)}__` }); + } + return out; +} + +function detectSkill(cwd: string | undefined): string { + // Best-effort: cwd often contains the project slug but rarely the running + // skill. Without a session-state mechanism, leave as 'unknown' — the + // skill marker () embedded in question text per + // future plan-tune work is the durable path. + void cwd; + return 'unknown'; +} + +function spawnLog(payload: Record): void { + // Locate the bin relative to this script's directory. + const here = path.dirname(new URL(import.meta.url).pathname); + // hosts/claude/hooks/ -> ../../../bin/ + const repoRoot = path.resolve(here, '..', '..', '..'); + const bin = path.join(repoRoot, 'bin', 'gstack-question-log'); + const res = spawnSync(bin, [JSON.stringify(payload)], { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'], + timeout: 3000, + }); + if (res.status !== 0) { + logHookError(`gstack-question-log exited ${res.status}: ${res.stderr || res.stdout}`); + } +} + +async function main(): Promise { + const raw = await readStdin(); + if (!raw.trim()) { + process.exit(0); + } + let stdin: HookStdin; + try { + stdin = JSON.parse(raw); + } catch (e) { + logHookError(`stdin parse failed: ${(e as Error).message}`); + process.exit(0); + } + + const toolName = stdin.tool_name || ''; + if ( + toolName !== 'AskUserQuestion' && + !toolName.match(/^mcp__.+__AskUserQuestion$/) + ) { + // Matcher should have filtered this out; defensive no-op. + process.exit(0); + } + + const questions = stdin.tool_input?.questions || []; + if (questions.length === 0) { + process.exit(0); + } + + const skill = detectSkill(stdin.cwd); + const choices = extractUserChoices(stdin.tool_response, questions.length); + + for (let i = 0; i < questions.length; i++) { + const q = questions[i]; + const qText = q.question || ''; + if (!qText) continue; + + const opts = optionLabels(q.options || []); + const { id, stripped_question } = extractQuestionId(skill, qText, opts); + const recommended = extractRecommended(stripped_question, opts); + const summary = stripped_question.slice(0, 200); + const choice = choices[i] || { choice: '__unknown__' }; + + const payload: Record = { + skill, + question_id: id, + question_summary: summary, + options_count: opts.length, + user_choice: String(choice.choice).slice(0, 64), + source: choice.free_text ? 'auq-other' : 'hook', + session_id: stdin.session_id?.slice(0, 64), + tool_use_id: stdin.tool_use_id?.slice(0, 128), + }; + if (recommended) payload.recommended = recommended.slice(0, 64); + if (choice.free_text) payload.free_text = String(choice.free_text); + + spawnLog(payload); + } + + process.exit(0); +} + +main().catch((e) => { + logHookError(`main crash: ${(e as Error).message}`); + process.exit(0); +}); diff --git a/test/question-log-hook.test.ts b/test/question-log-hook.test.ts new file mode 100644 index 000000000..43b75d0ff --- /dev/null +++ b/test/question-log-hook.test.ts @@ -0,0 +1,285 @@ +/** + * PostToolUse hook (plan-tune cathedral T5) — unit tests. + * + * Feeds the hook synthetic Claude Code hook payloads via stdin and asserts + * the resulting question-log.jsonl reflects the right schema. Covers: + * - Marker-first question_id (D18 progressive markers) + * - Hash fallback when no marker + * - source=hook tagging + * - source=auq-other when free_text present + * - Dedup on (source, tool_use_id) composite (D3) + * - Hook exits 0 even on malformed input (never blocks user session) + * - mcp__*__AskUserQuestion matcher acceptance + * - "(recommended)" label parse → recommended field populated + * - Refuse-on-ambiguous: two (recommended) labels → recommended omitted + */ + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { spawnSync } from 'child_process'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-log-hook'); + +let stateRoot: string; + +beforeEach(() => { + stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-hooklog-')); + // Pre-create slug-resolved project dir so the bin's gstack-slug doesn't + // recompute every time. +}); + +afterEach(() => { + fs.rmSync(stateRoot, { recursive: true, force: true }); +}); + +function runHook(stdin: object): { stdout: string; stderr: string; status: number } { + const env: Record = {}; + for (const [k, v] of Object.entries(process.env)) { + if (v !== undefined) env[k] = v; + } + env.GSTACK_STATE_ROOT = stateRoot; + delete env.GSTACK_HOME; + env.GSTACK_QUESTION_LOG_NO_DERIVE = '1'; + const res = spawnSync(HOOK, [], { + env, + input: JSON.stringify(stdin), + encoding: 'utf-8', + cwd: ROOT, + }); + return { + stdout: res.stdout ?? '', + stderr: res.stderr ?? '', + status: res.status ?? -1, + }; +} + +function readLog(): Array> { + const projectDirs = fs.existsSync(path.join(stateRoot, 'projects')) + ? fs.readdirSync(path.join(stateRoot, 'projects')) + : []; + const all: Array> = []; + for (const d of projectDirs) { + const f = path.join(stateRoot, 'projects', d, 'question-log.jsonl'); + if (!fs.existsSync(f)) continue; + const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean); + for (const l of lines) { + try { + all.push(JSON.parse(l)); + } catch { + // skip malformed + } + } + } + return all; +} + +// ---------------------------------------------------------------------- +// Native AskUserQuestion capture +// ---------------------------------------------------------------------- + +describe('PostToolUse hook (native AskUserQuestion)', () => { + test('captures one event per question with source=hook and tool_use_id', () => { + const r = runHook({ + session_id: 'sess1', + hook_event_name: 'PostToolUse', + tool_name: 'AskUserQuestion', + tool_use_id: 'tu-1', + tool_input: { + questions: [ + { + question: 'D1 — Test capture\nRecommendation: A', + options: ['A) Accept (recommended)', 'B) Reject'], + multiSelect: false, + }, + ], + }, + tool_response: { + answers: [{ option_label: 'A) Accept (recommended)' }], + }, + cwd: ROOT, + }); + expect(r.status).toBe(0); + const events = readLog(); + expect(events.length).toBe(1); + expect(events[0].source).toBe('hook'); + expect(events[0].tool_use_id).toBe('tu-1'); + expect(events[0].session_id).toBe('sess1'); + expect(typeof events[0].question_id).toBe('string'); + expect((events[0].question_id as string).startsWith('hook-')).toBe(true); + expect(events[0].user_choice).toContain('Accept'); + // Recommended parsed from (recommended) label + expect(events[0].recommended).toContain('Accept'); + }); + + test('marker-first question_id when present', () => { + runHook({ + session_id: 'sess2', + tool_name: 'AskUserQuestion', + tool_use_id: 'tu-2', + tool_input: { + questions: [ + { + question: 'D2 — Marker test \nRecommendation: A', + options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'], + }, + ], + }, + tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] }, + cwd: ROOT, + }); + const events = readLog(); + expect(events.length).toBe(1); + expect(events[0].question_id).toBe('ship-test-failure-triage'); + // Marker stripped from summary + expect((events[0].question_summary as string).includes(' { + test('accepts mcp__conductor__AskUserQuestion tool_name', () => { + const r = runHook({ + session_id: 'sess3', + tool_name: 'mcp__conductor__AskUserQuestion', + tool_use_id: 'tu-3', + tool_input: { + questions: [{ question: 'Test', options: ['A', 'B'] }], + }, + tool_response: { answers: [{ option_label: 'A' }] }, + cwd: ROOT, + }); + expect(r.status).toBe(0); + expect(readLog().length).toBe(1); + }); + + test('ignores unrelated tool_name (defensive)', () => { + const r = runHook({ + session_id: 'sess4', + tool_name: 'Bash', + tool_use_id: 'tu-4', + tool_input: {}, + cwd: ROOT, + }); + expect(r.status).toBe(0); + expect(readLog().length).toBe(0); + }); +}); + +// ---------------------------------------------------------------------- +// Free-text capture (Layer 8 dream cycle) +// ---------------------------------------------------------------------- + +describe('PostToolUse hook (free-text "Other" responses)', () => { + test('source=auq-other and free_text populated when user types free text', () => { + runHook({ + session_id: 'sess5', + tool_name: 'AskUserQuestion', + tool_use_id: 'tu-5', + tool_input: { + questions: [{ question: 'D5 — Other test', options: ['A', 'B'] }], + }, + tool_response: { + answers: [ + { + option_label: 'Other', + free_text: 'I always include tests with new features', + }, + ], + }, + cwd: ROOT, + }); + const events = readLog(); + expect(events.length).toBe(1); + expect(events[0].source).toBe('auq-other'); + expect(events[0].free_text).toContain('always include tests'); + }); +}); + +// ---------------------------------------------------------------------- +// Dedup +// ---------------------------------------------------------------------- + +describe('PostToolUse hook (dedup on source + tool_use_id)', () => { + test('second fire with same (source, tool_use_id) is dropped', () => { + const payload = { + session_id: 'sess6', + tool_name: 'AskUserQuestion', + tool_use_id: 'tu-6', + tool_input: { questions: [{ question: 'Dedup test', options: ['A'] }] }, + tool_response: { answers: [{ option_label: 'A' }] }, + cwd: ROOT, + }; + runHook(payload); + runHook(payload); + expect(readLog().length).toBe(1); + }); +}); + +// ---------------------------------------------------------------------- +// Refuse-on-ambiguous (D2 safety) +// ---------------------------------------------------------------------- + +describe('PostToolUse hook (recommended parser safety)', () => { + test('two (recommended) labels → recommended field omitted', () => { + runHook({ + session_id: 'sess7', + tool_name: 'AskUserQuestion', + tool_use_id: 'tu-7', + tool_input: { + questions: [ + { + question: 'Ambiguous test', + options: ['A) Foo (recommended)', 'B) Bar (recommended)'], + }, + ], + }, + tool_response: { answers: [{ option_label: 'A) Foo (recommended)' }] }, + cwd: ROOT, + }); + const events = readLog(); + expect(events.length).toBe(1); + expect(events[0].recommended).toBeUndefined(); + }); +}); + +// ---------------------------------------------------------------------- +// Crash safety +// ---------------------------------------------------------------------- + +describe('PostToolUse hook (crash safety)', () => { + test('exits 0 on empty stdin', () => { + const env: Record = {}; + for (const [k, v] of Object.entries(process.env)) { + if (v !== undefined) env[k] = v; + } + env.GSTACK_STATE_ROOT = stateRoot; + env.GSTACK_QUESTION_LOG_NO_DERIVE = '1'; + const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' }); + expect(res.status).toBe(0); + }); + + test('exits 0 on malformed JSON', () => { + const env: Record = {}; + for (const [k, v] of Object.entries(process.env)) { + if (v !== undefined) env[k] = v; + } + env.GSTACK_STATE_ROOT = stateRoot; + env.GSTACK_QUESTION_LOG_NO_DERIVE = '1'; + const res = spawnSync(HOOK, [], { + env, + input: 'not json', + encoding: 'utf-8', + }); + expect(res.status).toBe(0); + // Error logged to hook-errors.log + const errLog = path.join(stateRoot, 'hook-errors.log'); + expect(fs.existsSync(errLog)).toBe(true); + expect(fs.readFileSync(errLog, 'utf-8')).toContain('stdin parse failed'); + }); +});