mirror of https://github.com/garrytan/gstack.git
feat(hooks): PostToolUse capture hook for AskUserQuestion
Plan-tune cathedral T5. Closes the substrate hole that motivated this entire branch: agent-compliance-only logging produced zero events in weeks of dogfood. PostToolUse hook captures every AUQ fire deterministically. What ships: - hosts/claude/hooks/question-log-hook.ts — TS hook that reads Claude Code's hook stdin, walks tool_input.questions[*], extracts user choice + recommended option from tool_response, spawns gstack-question-log per question. - hosts/claude/hooks/question-log-hook — bash shim Claude Code's hook runner invokes; execs bun against the .ts file. - Marker-first question_id extraction (D18 progressive markers): <gstack-qid:foo-bar> stripped from question text, used as the id. Hash fallback hook-<sha1[:10]> for unmarked questions (observed-only, never used as preference key — D18 hash drift mitigation). - (recommended) label parsing for the user_choice/recommended fields, with refuse-on-ambiguous when two labels are present (D2 safety). - Free-text capture: source=auq-other + free_text field when user picks Other and types (Layer 8 dream cycle input). - Matcher covers both native AskUserQuestion and mcp__*__AskUserQuestion (Codex/Conductor catch from outside voice review). - Crash safety: always exits 0; errors land in ~/.gstack/hook-errors.log so the user's session is never blocked by a hook failure. gstack-question-log extended to: - Accept `source` field (default 'agent', new values: hook, auq-other, auto-decided, codex-import-marker, codex-import-pattern). - Accept `tool_use_id` (<=128 chars) for dedup. - Composite dedup on (source, tool_use_id) across the last 100 lines — protects against hook + preamble both firing on the same tool call (D3 belt+suspenders). - Async fire `gstack-developer-profile --derive` after each successful write so inferred.sample_size actually grows (D17 — without this, the cathedral's "before 0, after >0" metric never moves). - GSTACK_QUESTION_LOG_NO_DERIVE=1 escape hatch for tests. 9 new unit tests covering capture, marker extraction, MCP variant, free-text, dedup, ambiguous-recommended safety, crash paths. All pass plus the existing 88 tests across related files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2147532c07
commit
a8a0447870
|
|
@ -50,12 +50,48 @@ if (!j.skill || !/^[a-z0-9-]+\$/.test(j.skill)) {
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Required: question_id (kebab-case, <=64 chars)
|
// Required: question_id (kebab-case, <=64 chars).
|
||||||
|
// Cathedral T5: hook-sourced events use 'hook-<10-char-hash>' which is
|
||||||
|
// kebab-case-compatible and passes the same regex.
|
||||||
if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
|
if (!j.question_id || !/^[a-z0-9-]+\$/.test(j.question_id) || j.question_id.length > 64) {
|
||||||
process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
|
process.stderr.write('gstack-question-log: invalid question_id, must be kebab-case <=64 chars\n');
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Optional: source — tags which writer produced this event.
|
||||||
|
// 'agent' (default) — preamble-driven write from inside the running agent
|
||||||
|
// 'hook' — PostToolUse hook captured it deterministically (T5)
|
||||||
|
// 'auq-other' — user picked 'Other' and typed free text (Layer 8)
|
||||||
|
// 'auto-decided' — PreToolUse enforcement hook substituted the answer (T6)
|
||||||
|
// 'codex-import-marker' / 'codex-import-pattern' — T9 backfill from Codex
|
||||||
|
const ALLOWED_SOURCES = ['agent', 'hook', 'auq-other', 'auto-decided', 'codex-import-marker', 'codex-import-pattern'];
|
||||||
|
if (j.source !== undefined) {
|
||||||
|
if (!ALLOWED_SOURCES.includes(j.source)) {
|
||||||
|
process.stderr.write('gstack-question-log: invalid source, must be one of: ' + ALLOWED_SOURCES.join(', ') + '\n');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
j.source = 'agent';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional: tool_use_id — Claude Code hook stdin field; used for dedup.
|
||||||
|
if (j.tool_use_id !== undefined) {
|
||||||
|
if (typeof j.tool_use_id !== 'string' || j.tool_use_id.length > 128) {
|
||||||
|
process.stderr.write('gstack-question-log: tool_use_id must be string <=128 chars\n');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Optional: free_text — sanitize (no newlines, <=300 chars).
|
||||||
|
if (j.free_text !== undefined) {
|
||||||
|
if (typeof j.free_text !== 'string') {
|
||||||
|
process.stderr.write('gstack-question-log: free_text must be string\n');
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
if (j.free_text.length > 300) j.free_text = j.free_text.slice(0, 300);
|
||||||
|
j.free_text = j.free_text.replace(/\n+/g, ' ');
|
||||||
|
}
|
||||||
|
|
||||||
// Required: question_summary (non-empty, <=200 chars, no newlines)
|
// Required: question_summary (non-empty, <=200 chars, no newlines)
|
||||||
if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
|
if (typeof j.question_summary !== 'string' || !j.question_summary.length) {
|
||||||
process.stderr.write('gstack-question-log: question_summary required\n');
|
process.stderr.write('gstack-question-log: question_summary required\n');
|
||||||
|
|
@ -165,7 +201,49 @@ if [ $VALIDATE_RC -ne 0 ] || [ -z "$VALIDATED" ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "$VALIDATED" >> "$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
|
LOG_FILE="$GSTACK_HOME/projects/$SLUG/question-log.jsonl"
|
||||||
|
|
||||||
|
# Cathedral T5: composite-source dedup. If this exact (source, tool_use_id)
|
||||||
|
# was already logged within the last 100 lines, skip — protects against
|
||||||
|
# hook + agent both writing the same fire (D3 plan-tune cathedral decision).
|
||||||
|
# Lookup is bounded so the bin stays cheap on hot paths.
|
||||||
|
DEDUP_SKIP=""
|
||||||
|
if [ -f "$LOG_FILE" ]; then
|
||||||
|
DEDUP_SKIP=$(VALIDATED_JSON="$VALIDATED" LOG_FILE_PATH="$LOG_FILE" bun -e '
|
||||||
|
const fs = require("fs");
|
||||||
|
const j = JSON.parse(process.env.VALIDATED_JSON);
|
||||||
|
if (!j.tool_use_id) { console.log(""); process.exit(0); }
|
||||||
|
const want = j.source + ":" + j.tool_use_id;
|
||||||
|
const lines = fs.readFileSync(process.env.LOG_FILE_PATH, "utf-8").trim().split("\n").slice(-100);
|
||||||
|
for (const ln of lines) {
|
||||||
|
try {
|
||||||
|
const p = JSON.parse(ln);
|
||||||
|
if (p.source && p.tool_use_id && (p.source + ":" + p.tool_use_id) === want) {
|
||||||
|
console.log("dup");
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
console.log("");
|
||||||
|
' 2>/dev/null)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$DEDUP_SKIP" = "dup" ]; then
|
||||||
|
echo "DEDUP: skipped (source=$(echo "$VALIDATED" | bun -e 'const j=JSON.parse(await Bun.stdin.text()); console.log(j.source);'), tool_use_id duplicate)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$VALIDATED" >> "$LOG_FILE"
|
||||||
|
|
||||||
|
# Cathedral T5: fire-and-forget --derive so inferred dimensions stay current
|
||||||
|
# without per-event latency (D17). Sub-second op; output suppressed; never
|
||||||
|
# blocks the hook caller. Skipped via GSTACK_QUESTION_LOG_NO_DERIVE=1 for
|
||||||
|
# tests that don't want the side effect.
|
||||||
|
if [ -z "${GSTACK_QUESTION_LOG_NO_DERIVE:-}" ]; then
|
||||||
|
(
|
||||||
|
nohup "$SCRIPT_DIR/gstack-developer-profile" --derive >/dev/null 2>&1 &
|
||||||
|
) >/dev/null 2>&1
|
||||||
|
fi
|
||||||
|
|
||||||
# NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
|
# NOTE: question-log.jsonl is deliberately NOT enqueued for gbrain-sync.
|
||||||
# Per Codex v2 review, audit/derivation data stays local alongside the
|
# Per Codex v2 review, audit/derivation data stays local alongside the
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Bash shim — Claude Code hooks run `command` strings via /bin/sh, so this
|
||||||
|
# wrapper makes the TypeScript hook executable via bun. Settings.json
|
||||||
|
# references this file directly.
|
||||||
|
set -e
|
||||||
|
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
exec bun "$HERE/question-log-hook.ts"
|
||||||
|
|
@ -0,0 +1,286 @@
|
||||||
|
#!/usr/bin/env bun
|
||||||
|
/**
|
||||||
|
* PostToolUse hook for AskUserQuestion (Claude Code, plan-tune cathedral T5).
|
||||||
|
*
|
||||||
|
* Reads hook stdin JSON, extracts every AUQ question + user choice from the
|
||||||
|
* tool_input/tool_response, and writes them via gstack-question-log so the
|
||||||
|
* substrate captures fires deterministically — no agent compliance required.
|
||||||
|
*
|
||||||
|
* Triggered by ~/.claude/settings.json:
|
||||||
|
* {
|
||||||
|
* "hooks": {
|
||||||
|
* "PostToolUse": [
|
||||||
|
* {
|
||||||
|
* "matcher": "(AskUserQuestion|mcp__.*__AskUserQuestion)",
|
||||||
|
* "hooks": [
|
||||||
|
* { "type": "command",
|
||||||
|
* "command": "$CLAUDE_PROJECT_DIR/.claude/skills/gstack/hosts/claude/hooks/question-log-hook",
|
||||||
|
* "timeout": 5 }
|
||||||
|
* ]
|
||||||
|
* }
|
||||||
|
* ]
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Invariants:
|
||||||
|
* - Always exits 0. A failing hook MUST NOT block the user's session.
|
||||||
|
* Errors land in ~/.gstack/hook-errors.log for postmortem.
|
||||||
|
* - Spawns gstack-question-log as a subprocess; that bin handles
|
||||||
|
* validation, dedup (source+tool_use_id), async derive.
|
||||||
|
* - Marker-first question_id (`<gstack-qid:foo-bar>`), hash fallback
|
||||||
|
* (D18 progressive markers).
|
||||||
|
*
|
||||||
|
* See docs/spikes/claude-code-hook-mutation.md for the protocol contract.
|
||||||
|
*/
|
||||||
|
import * as crypto from 'crypto';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as os from 'os';
|
||||||
|
import { spawnSync } from 'child_process';
|
||||||
|
|
||||||
|
interface HookStdin {
|
||||||
|
session_id?: string;
|
||||||
|
hook_event_name?: string;
|
||||||
|
tool_name?: string;
|
||||||
|
tool_use_id?: string;
|
||||||
|
tool_input?: {
|
||||||
|
questions?: Array<{
|
||||||
|
question?: string;
|
||||||
|
options?: Array<string | { label?: string; description?: string }>;
|
||||||
|
multiSelect?: boolean;
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
tool_response?: unknown;
|
||||||
|
cwd?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ExtractedQuestion {
|
||||||
|
question_id: string;
|
||||||
|
question_summary: string;
|
||||||
|
options_count: number;
|
||||||
|
user_choice: string;
|
||||||
|
recommended?: string;
|
||||||
|
free_text?: string;
|
||||||
|
category?: string;
|
||||||
|
door_type?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const MARKER_RE = /<gstack-qid:([a-z0-9-]{1,64})>/i;
|
||||||
|
const RECOMMENDED_LABEL_RE = /\(recommended\)\s*$/i;
|
||||||
|
|
||||||
|
function logHookError(msg: string): void {
|
||||||
|
try {
|
||||||
|
const stateRoot =
|
||||||
|
process.env.GSTACK_STATE_ROOT ||
|
||||||
|
process.env.GSTACK_HOME ||
|
||||||
|
path.join(os.homedir(), '.gstack');
|
||||||
|
fs.mkdirSync(stateRoot, { recursive: true });
|
||||||
|
fs.appendFileSync(
|
||||||
|
path.join(stateRoot, 'hook-errors.log'),
|
||||||
|
`${new Date().toISOString()} question-log-hook: ${msg}\n`,
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
// Last-resort: swallow. Hook must not block.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function readStdin(): Promise<string> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
let buf = '';
|
||||||
|
process.stdin.setEncoding('utf-8');
|
||||||
|
process.stdin.on('data', (chunk) => (buf += chunk));
|
||||||
|
process.stdin.on('end', () => resolve(buf));
|
||||||
|
process.stdin.on('error', () => resolve(buf));
|
||||||
|
// Hard cutoff so we don't hang the user's session waiting for stdin.
|
||||||
|
setTimeout(() => resolve(buf), 2000);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function hashQuestionId(skill: string, question: string, options: string[]): string {
|
||||||
|
const sorted = [...options].sort().join('|');
|
||||||
|
const h = crypto
|
||||||
|
.createHash('sha1')
|
||||||
|
.update(`${skill}::${question}::${sorted}`)
|
||||||
|
.digest('hex');
|
||||||
|
return `hook-${h.slice(0, 10)}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marker-first id extraction. Returns the marker id (stripped of the
|
||||||
|
* <gstack-qid:...> wrapper) when present, else a hash-based hook- id.
|
||||||
|
* Per D18 progressive markers — hash ids are observed-only, never used
|
||||||
|
* as preference keys.
|
||||||
|
*/
|
||||||
|
function extractQuestionId(
|
||||||
|
skill: string,
|
||||||
|
questionText: string,
|
||||||
|
options: string[],
|
||||||
|
): { id: string; marker_present: boolean; stripped_question: string } {
|
||||||
|
const match = questionText.match(MARKER_RE);
|
||||||
|
if (match) {
|
||||||
|
return {
|
||||||
|
id: match[1],
|
||||||
|
marker_present: true,
|
||||||
|
stripped_question: questionText.replace(MARKER_RE, '').trim(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
id: hashQuestionId(skill, questionText, options),
|
||||||
|
marker_present: false,
|
||||||
|
stripped_question: questionText,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function optionLabels(opts: Array<string | { label?: string; description?: string }>): string[] {
|
||||||
|
return opts.map((o) => (typeof o === 'string' ? o : o.label || o.description || ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse "(recommended)" label-first per D2; fall back to "Recommendation: X"
|
||||||
|
* prose match; refuse (return undefined) if ambiguous.
|
||||||
|
*/
|
||||||
|
function extractRecommended(questionText: string, opts: string[]): string | undefined {
|
||||||
|
const labelMatches = opts.filter((o) => RECOMMENDED_LABEL_RE.test(o));
|
||||||
|
if (labelMatches.length === 1) return labelMatches[0].replace(RECOMMENDED_LABEL_RE, '').trim();
|
||||||
|
if (labelMatches.length > 1) return undefined; // ambiguous
|
||||||
|
|
||||||
|
const m = questionText.match(/Recommendation:\s*([^\n]+)/i);
|
||||||
|
if (!m) return undefined;
|
||||||
|
const recPhrase = m[1].trim();
|
||||||
|
const matchByPrefix = opts.find((o) => o.toLowerCase().startsWith(recPhrase.toLowerCase().slice(0, 12)));
|
||||||
|
return matchByPrefix;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Best-effort extraction of which option the user picked per question.
|
||||||
|
* AUQ tool_response shape varies by Claude Code variant (native vs MCP),
|
||||||
|
* and the hook stdin docs don't pin a single canonical shape. We handle
|
||||||
|
* the common cases gracefully.
|
||||||
|
*/
|
||||||
|
function extractUserChoices(
|
||||||
|
response: unknown,
|
||||||
|
questionCount: number,
|
||||||
|
): Array<{ choice: string; free_text?: string }> {
|
||||||
|
const out: Array<{ choice: string; free_text?: string }> = [];
|
||||||
|
if (!response) {
|
||||||
|
for (let i = 0; i < questionCount; i++) out.push({ choice: '__unknown__' });
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
// Shape A: { answers: [{option_label, free_text?}] }
|
||||||
|
// Shape B: { questions: [{user_answer}] }
|
||||||
|
// Shape C: { content: [...] } or array.
|
||||||
|
// We probe lazily.
|
||||||
|
const rec = response as Record<string, unknown>;
|
||||||
|
if (Array.isArray(rec.answers)) {
|
||||||
|
for (const a of rec.answers as Array<Record<string, unknown>>) {
|
||||||
|
const choice = (a.option_label || a.label || a.choice || a.answer || '__unknown__') as string;
|
||||||
|
const freeText = (a.free_text || a.other_text) as string | undefined;
|
||||||
|
out.push(freeText ? { choice, free_text: freeText } : { choice });
|
||||||
|
}
|
||||||
|
while (out.length < questionCount) out.push({ choice: '__unknown__' });
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
if (Array.isArray(rec.questions)) {
|
||||||
|
for (const q of rec.questions as Array<Record<string, unknown>>) {
|
||||||
|
const choice = (q.user_answer || q.answer || q.choice || '__unknown__') as string;
|
||||||
|
out.push({ choice });
|
||||||
|
}
|
||||||
|
while (out.length < questionCount) out.push({ choice: '__unknown__' });
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
// Fall back: stringify and log first 100 chars to help future debugging.
|
||||||
|
for (let i = 0; i < questionCount; i++) {
|
||||||
|
out.push({ choice: `__response-shape-unknown:${JSON.stringify(response).slice(0, 80)}__` });
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectSkill(cwd: string | undefined): string {
|
||||||
|
// Best-effort: cwd often contains the project slug but rarely the running
|
||||||
|
// skill. Without a session-state mechanism, leave as 'unknown' — the
|
||||||
|
// skill marker (<gstack-skill:NAME>) embedded in question text per
|
||||||
|
// future plan-tune work is the durable path.
|
||||||
|
void cwd;
|
||||||
|
return 'unknown';
|
||||||
|
}
|
||||||
|
|
||||||
|
function spawnLog(payload: Record<string, unknown>): void {
|
||||||
|
// Locate the bin relative to this script's directory.
|
||||||
|
const here = path.dirname(new URL(import.meta.url).pathname);
|
||||||
|
// hosts/claude/hooks/ -> ../../../bin/
|
||||||
|
const repoRoot = path.resolve(here, '..', '..', '..');
|
||||||
|
const bin = path.join(repoRoot, 'bin', 'gstack-question-log');
|
||||||
|
const res = spawnSync(bin, [JSON.stringify(payload)], {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
|
timeout: 3000,
|
||||||
|
});
|
||||||
|
if (res.status !== 0) {
|
||||||
|
logHookError(`gstack-question-log exited ${res.status}: ${res.stderr || res.stdout}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(): Promise<void> {
|
||||||
|
const raw = await readStdin();
|
||||||
|
if (!raw.trim()) {
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
let stdin: HookStdin;
|
||||||
|
try {
|
||||||
|
stdin = JSON.parse(raw);
|
||||||
|
} catch (e) {
|
||||||
|
logHookError(`stdin parse failed: ${(e as Error).message}`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const toolName = stdin.tool_name || '';
|
||||||
|
if (
|
||||||
|
toolName !== 'AskUserQuestion' &&
|
||||||
|
!toolName.match(/^mcp__.+__AskUserQuestion$/)
|
||||||
|
) {
|
||||||
|
// Matcher should have filtered this out; defensive no-op.
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const questions = stdin.tool_input?.questions || [];
|
||||||
|
if (questions.length === 0) {
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
const skill = detectSkill(stdin.cwd);
|
||||||
|
const choices = extractUserChoices(stdin.tool_response, questions.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < questions.length; i++) {
|
||||||
|
const q = questions[i];
|
||||||
|
const qText = q.question || '';
|
||||||
|
if (!qText) continue;
|
||||||
|
|
||||||
|
const opts = optionLabels(q.options || []);
|
||||||
|
const { id, stripped_question } = extractQuestionId(skill, qText, opts);
|
||||||
|
const recommended = extractRecommended(stripped_question, opts);
|
||||||
|
const summary = stripped_question.slice(0, 200);
|
||||||
|
const choice = choices[i] || { choice: '__unknown__' };
|
||||||
|
|
||||||
|
const payload: Record<string, unknown> = {
|
||||||
|
skill,
|
||||||
|
question_id: id,
|
||||||
|
question_summary: summary,
|
||||||
|
options_count: opts.length,
|
||||||
|
user_choice: String(choice.choice).slice(0, 64),
|
||||||
|
source: choice.free_text ? 'auq-other' : 'hook',
|
||||||
|
session_id: stdin.session_id?.slice(0, 64),
|
||||||
|
tool_use_id: stdin.tool_use_id?.slice(0, 128),
|
||||||
|
};
|
||||||
|
if (recommended) payload.recommended = recommended.slice(0, 64);
|
||||||
|
if (choice.free_text) payload.free_text = String(choice.free_text);
|
||||||
|
|
||||||
|
spawnLog(payload);
|
||||||
|
}
|
||||||
|
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch((e) => {
|
||||||
|
logHookError(`main crash: ${(e as Error).message}`);
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,285 @@
|
||||||
|
/**
|
||||||
|
* PostToolUse hook (plan-tune cathedral T5) — unit tests.
|
||||||
|
*
|
||||||
|
* Feeds the hook synthetic Claude Code hook payloads via stdin and asserts
|
||||||
|
* the resulting question-log.jsonl reflects the right schema. Covers:
|
||||||
|
* - Marker-first question_id (D18 progressive markers)
|
||||||
|
* - Hash fallback when no marker
|
||||||
|
* - source=hook tagging
|
||||||
|
* - source=auq-other when free_text present
|
||||||
|
* - Dedup on (source, tool_use_id) composite (D3)
|
||||||
|
* - Hook exits 0 even on malformed input (never blocks user session)
|
||||||
|
* - mcp__*__AskUserQuestion matcher acceptance
|
||||||
|
* - "(recommended)" label parse → recommended field populated
|
||||||
|
* - Refuse-on-ambiguous: two (recommended) labels → recommended omitted
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as path from 'path';
|
||||||
|
import * as os from 'os';
|
||||||
|
import { spawnSync } from 'child_process';
|
||||||
|
|
||||||
|
const ROOT = path.resolve(import.meta.dir, '..');
|
||||||
|
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-log-hook');
|
||||||
|
|
||||||
|
let stateRoot: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-hooklog-'));
|
||||||
|
// Pre-create slug-resolved project dir so the bin's gstack-slug doesn't
|
||||||
|
// recompute every time.
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
function runHook(stdin: object): { stdout: string; stderr: string; status: number } {
|
||||||
|
const env: Record<string, string> = {};
|
||||||
|
for (const [k, v] of Object.entries(process.env)) {
|
||||||
|
if (v !== undefined) env[k] = v;
|
||||||
|
}
|
||||||
|
env.GSTACK_STATE_ROOT = stateRoot;
|
||||||
|
delete env.GSTACK_HOME;
|
||||||
|
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||||
|
const res = spawnSync(HOOK, [], {
|
||||||
|
env,
|
||||||
|
input: JSON.stringify(stdin),
|
||||||
|
encoding: 'utf-8',
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
stdout: res.stdout ?? '',
|
||||||
|
stderr: res.stderr ?? '',
|
||||||
|
status: res.status ?? -1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function readLog(): Array<Record<string, unknown>> {
|
||||||
|
const projectDirs = fs.existsSync(path.join(stateRoot, 'projects'))
|
||||||
|
? fs.readdirSync(path.join(stateRoot, 'projects'))
|
||||||
|
: [];
|
||||||
|
const all: Array<Record<string, unknown>> = [];
|
||||||
|
for (const d of projectDirs) {
|
||||||
|
const f = path.join(stateRoot, 'projects', d, 'question-log.jsonl');
|
||||||
|
if (!fs.existsSync(f)) continue;
|
||||||
|
const lines = fs.readFileSync(f, 'utf-8').trim().split('\n').filter(Boolean);
|
||||||
|
for (const l of lines) {
|
||||||
|
try {
|
||||||
|
all.push(JSON.parse(l));
|
||||||
|
} catch {
|
||||||
|
// skip malformed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return all;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Native AskUserQuestion capture
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('PostToolUse hook (native AskUserQuestion)', () => {
|
||||||
|
test('captures one event per question with source=hook and tool_use_id', () => {
|
||||||
|
const r = runHook({
|
||||||
|
session_id: 'sess1',
|
||||||
|
hook_event_name: 'PostToolUse',
|
||||||
|
tool_name: 'AskUserQuestion',
|
||||||
|
tool_use_id: 'tu-1',
|
||||||
|
tool_input: {
|
||||||
|
questions: [
|
||||||
|
{
|
||||||
|
question: 'D1 — Test capture\nRecommendation: A',
|
||||||
|
options: ['A) Accept (recommended)', 'B) Reject'],
|
||||||
|
multiSelect: false,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
tool_response: {
|
||||||
|
answers: [{ option_label: 'A) Accept (recommended)' }],
|
||||||
|
},
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
expect(r.status).toBe(0);
|
||||||
|
const events = readLog();
|
||||||
|
expect(events.length).toBe(1);
|
||||||
|
expect(events[0].source).toBe('hook');
|
||||||
|
expect(events[0].tool_use_id).toBe('tu-1');
|
||||||
|
expect(events[0].session_id).toBe('sess1');
|
||||||
|
expect(typeof events[0].question_id).toBe('string');
|
||||||
|
expect((events[0].question_id as string).startsWith('hook-')).toBe(true);
|
||||||
|
expect(events[0].user_choice).toContain('Accept');
|
||||||
|
// Recommended parsed from (recommended) label
|
||||||
|
expect(events[0].recommended).toContain('Accept');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('marker-first question_id when <gstack-qid:foo> present', () => {
|
||||||
|
runHook({
|
||||||
|
session_id: 'sess2',
|
||||||
|
tool_name: 'AskUserQuestion',
|
||||||
|
tool_use_id: 'tu-2',
|
||||||
|
tool_input: {
|
||||||
|
questions: [
|
||||||
|
{
|
||||||
|
question: 'D2 — Marker test <gstack-qid:ship-test-failure-triage>\nRecommendation: A',
|
||||||
|
options: ['A) Fix now (recommended)', 'B) Investigate', 'C) Ack and ship'],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
tool_response: { answers: [{ option_label: 'A) Fix now (recommended)' }] },
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
const events = readLog();
|
||||||
|
expect(events.length).toBe(1);
|
||||||
|
expect(events[0].question_id).toBe('ship-test-failure-triage');
|
||||||
|
// Marker stripped from summary
|
||||||
|
expect((events[0].question_summary as string).includes('<gstack-qid:')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// MCP AskUserQuestion variant (Conductor)
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('PostToolUse hook (mcp__*__AskUserQuestion variant)', () => {
|
||||||
|
test('accepts mcp__conductor__AskUserQuestion tool_name', () => {
|
||||||
|
const r = runHook({
|
||||||
|
session_id: 'sess3',
|
||||||
|
tool_name: 'mcp__conductor__AskUserQuestion',
|
||||||
|
tool_use_id: 'tu-3',
|
||||||
|
tool_input: {
|
||||||
|
questions: [{ question: 'Test', options: ['A', 'B'] }],
|
||||||
|
},
|
||||||
|
tool_response: { answers: [{ option_label: 'A' }] },
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
expect(r.status).toBe(0);
|
||||||
|
expect(readLog().length).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('ignores unrelated tool_name (defensive)', () => {
|
||||||
|
const r = runHook({
|
||||||
|
session_id: 'sess4',
|
||||||
|
tool_name: 'Bash',
|
||||||
|
tool_use_id: 'tu-4',
|
||||||
|
tool_input: {},
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
expect(r.status).toBe(0);
|
||||||
|
expect(readLog().length).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Free-text capture (Layer 8 dream cycle)
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('PostToolUse hook (free-text "Other" responses)', () => {
|
||||||
|
test('source=auq-other and free_text populated when user types free text', () => {
|
||||||
|
runHook({
|
||||||
|
session_id: 'sess5',
|
||||||
|
tool_name: 'AskUserQuestion',
|
||||||
|
tool_use_id: 'tu-5',
|
||||||
|
tool_input: {
|
||||||
|
questions: [{ question: 'D5 — Other test', options: ['A', 'B'] }],
|
||||||
|
},
|
||||||
|
tool_response: {
|
||||||
|
answers: [
|
||||||
|
{
|
||||||
|
option_label: 'Other',
|
||||||
|
free_text: 'I always include tests with new features',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
const events = readLog();
|
||||||
|
expect(events.length).toBe(1);
|
||||||
|
expect(events[0].source).toBe('auq-other');
|
||||||
|
expect(events[0].free_text).toContain('always include tests');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Dedup
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('PostToolUse hook (dedup on source + tool_use_id)', () => {
|
||||||
|
test('second fire with same (source, tool_use_id) is dropped', () => {
|
||||||
|
const payload = {
|
||||||
|
session_id: 'sess6',
|
||||||
|
tool_name: 'AskUserQuestion',
|
||||||
|
tool_use_id: 'tu-6',
|
||||||
|
tool_input: { questions: [{ question: 'Dedup test', options: ['A'] }] },
|
||||||
|
tool_response: { answers: [{ option_label: 'A' }] },
|
||||||
|
cwd: ROOT,
|
||||||
|
};
|
||||||
|
runHook(payload);
|
||||||
|
runHook(payload);
|
||||||
|
expect(readLog().length).toBe(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Refuse-on-ambiguous (D2 safety)
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('PostToolUse hook (recommended parser safety)', () => {
|
||||||
|
test('two (recommended) labels → recommended field omitted', () => {
|
||||||
|
runHook({
|
||||||
|
session_id: 'sess7',
|
||||||
|
tool_name: 'AskUserQuestion',
|
||||||
|
tool_use_id: 'tu-7',
|
||||||
|
tool_input: {
|
||||||
|
questions: [
|
||||||
|
{
|
||||||
|
question: 'Ambiguous test',
|
||||||
|
options: ['A) Foo (recommended)', 'B) Bar (recommended)'],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
tool_response: { answers: [{ option_label: 'A) Foo (recommended)' }] },
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
const events = readLog();
|
||||||
|
expect(events.length).toBe(1);
|
||||||
|
expect(events[0].recommended).toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Crash safety
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('PostToolUse hook (crash safety)', () => {
|
||||||
|
test('exits 0 on empty stdin', () => {
|
||||||
|
const env: Record<string, string> = {};
|
||||||
|
for (const [k, v] of Object.entries(process.env)) {
|
||||||
|
if (v !== undefined) env[k] = v;
|
||||||
|
}
|
||||||
|
env.GSTACK_STATE_ROOT = stateRoot;
|
||||||
|
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||||
|
const res = spawnSync(HOOK, [], { env, input: '', encoding: 'utf-8' });
|
||||||
|
expect(res.status).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('exits 0 on malformed JSON', () => {
|
||||||
|
const env: Record<string, string> = {};
|
||||||
|
for (const [k, v] of Object.entries(process.env)) {
|
||||||
|
if (v !== undefined) env[k] = v;
|
||||||
|
}
|
||||||
|
env.GSTACK_STATE_ROOT = stateRoot;
|
||||||
|
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||||
|
const res = spawnSync(HOOK, [], {
|
||||||
|
env,
|
||||||
|
input: 'not json',
|
||||||
|
encoding: 'utf-8',
|
||||||
|
});
|
||||||
|
expect(res.status).toBe(0);
|
||||||
|
// Error logged to hook-errors.log
|
||||||
|
const errLog = path.join(stateRoot, 'hook-errors.log');
|
||||||
|
expect(fs.existsSync(errLog)).toBe(true);
|
||||||
|
expect(fs.readFileSync(errLog, 'utf-8')).toContain('stdin parse failed');
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Reference in New Issue