mirror of https://github.com/garrytan/gstack.git
482 lines
17 KiB
TypeScript
482 lines
17 KiB
TypeScript
#!/usr/bin/env bun
|
|
/**
|
|
* PreToolUse hook for AskUserQuestion (Claude Code, plan-tune cathedral T6).
|
|
*
|
|
* Enforces never-ask / always-ask / ask-only-for-one-way preferences
|
|
* deterministically — no agent compliance required.
|
|
*
|
|
* Decision tree (per question in tool_input.questions):
|
|
* 1. Extract question_id via marker (<gstack-qid:foo-bar>). If no marker,
|
|
* enforcement is skipped for this question (D18 — hash IDs are
|
|
* observed-only, never used as preference keys).
|
|
* 2. Look up door_type from scripts/question-registry.ts (default two-way).
|
|
* 3. Read preferences with precedence: project-local > global (D8).
|
|
* 4. Apply:
|
|
* never-ask + one-way → defer (safety override; one-way always asks).
|
|
* never-ask + two-way + marker → deny with auto-decided recommendation
|
|
* in reason. Mark tool_use_id so PostToolUse logs as 'auto-decided'.
|
|
* ask-only-for-one-way + two-way + marker → same as never-ask.
|
|
* always-ask, or no preference → defer.
|
|
*
|
|
* Why deny+reason instead of allow+updatedInput:
|
|
* AskUserQuestion's `updatedInput` shape for "pre-resolve this question"
|
|
* isn't structurally pinned in Claude Code docs (spike T4 left as open
|
|
* question). `deny` with a reason that names the auto-decided option is
|
|
* conservative + reliable: the model receives the rejection feedback,
|
|
* reads the recommended option from the reason, and proceeds without
|
|
* re-firing AUQ. When the spike around input mutation lands, we can
|
|
* swap to allow+updatedInput without changing the contract.
|
|
*
|
|
* Recommended-option extraction (per D2):
|
|
* - First: (recommended) label suffix on an option.
|
|
* - Fall back: "Recommendation: X" prose match against option labels.
|
|
* - Refuse to auto-decide if ambiguous (multiple labels OR no parseable
|
|
* recommendation): defer instead of silent-wrong.
|
|
*
|
|
* Always exits 0. Hook errors land in ~/.gstack/hook-errors.log.
|
|
* See docs/spikes/claude-code-hook-mutation.md for the protocol contract.
|
|
*/
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import * as os from 'os';
|
|
import { spawnSync } from 'child_process';
|
|
|
|
interface HookStdin {
|
|
session_id?: string;
|
|
hook_event_name?: string;
|
|
tool_name?: string;
|
|
tool_use_id?: string;
|
|
tool_input?: {
|
|
questions?: Array<{
|
|
question?: string;
|
|
options?: Array<string | { label?: string; description?: string }>;
|
|
multiSelect?: boolean;
|
|
}>;
|
|
};
|
|
cwd?: string;
|
|
}
|
|
|
|
const MARKER_RE = /<gstack-qid:([a-z0-9-]{1,64})>/i;
|
|
const RECOMMENDED_LABEL_RE = /\(recommended\)\s*$/i;
|
|
|
|
function stateRoot(): string {
|
|
return (
|
|
process.env.GSTACK_STATE_ROOT ||
|
|
process.env.GSTACK_HOME ||
|
|
path.join(os.homedir(), '.gstack')
|
|
);
|
|
}
|
|
|
|
function logHookError(msg: string): void {
|
|
try {
|
|
const sr = stateRoot();
|
|
fs.mkdirSync(sr, { recursive: true });
|
|
fs.appendFileSync(
|
|
path.join(sr, 'hook-errors.log'),
|
|
`${new Date().toISOString()} question-preference-hook: ${msg}\n`,
|
|
);
|
|
} catch {
|
|
// last-resort swallow
|
|
}
|
|
}
|
|
|
|
function readStdin(): Promise<string> {
|
|
return new Promise((resolve) => {
|
|
let buf = '';
|
|
process.stdin.setEncoding('utf-8');
|
|
process.stdin.on('data', (chunk) => (buf += chunk));
|
|
process.stdin.on('end', () => resolve(buf));
|
|
process.stdin.on('error', () => resolve(buf));
|
|
setTimeout(() => resolve(buf), 2000);
|
|
});
|
|
}
|
|
|
|
function defer(additionalContext?: string): void {
|
|
// "Defer" means "no permission opinion — let the tool run and the question
|
|
// render normally." The Claude Code hook spec only defines the
|
|
// permissionDecision values "allow" | "deny" | "ask"; there is no "defer".
|
|
// Native Claude Code silently ignored the bogus value and fell through to
|
|
// normal flow, so emitting it appeared to work. Conductor's
|
|
// mcp__conductor__AskUserQuestion bridge does NOT ignore it: an
|
|
// unrecognized permissionDecision on its own injected tool hangs the
|
|
// round-trip, so the question never renders and no tool_result is ever
|
|
// returned (the harness then substitutes "[Tool result missing due to
|
|
// internal error]"). Since defer() fires on every ordinary question that
|
|
// has no never-ask enforcement, this broke AskUserQuestion entirely under
|
|
// Conductor.
|
|
//
|
|
// Express "no opinion" the spec-correct way: emit NO permissionDecision.
|
|
// When there is no additionalContext to surface, emit nothing at all
|
|
// (empty stdout + exit 0 is the canonical "no decision" hook response).
|
|
// When we do have Layer 8 memory context, surface it via additionalContext
|
|
// alone — still with no permissionDecision.
|
|
if (additionalContext) {
|
|
process.stdout.write(
|
|
JSON.stringify({
|
|
hookSpecificOutput: {
|
|
hookEventName: 'PreToolUse',
|
|
additionalContext,
|
|
},
|
|
}),
|
|
);
|
|
}
|
|
process.exit(0);
|
|
}
|
|
|
|
function deny(reason: string): void {
|
|
process.stdout.write(
|
|
JSON.stringify({
|
|
hookSpecificOutput: {
|
|
hookEventName: 'PreToolUse',
|
|
permissionDecision: 'deny',
|
|
permissionDecisionReason: reason,
|
|
},
|
|
}),
|
|
);
|
|
process.exit(0);
|
|
}
|
|
|
|
function readJsonSafe(filePath: string): Record<string, unknown> | null {
|
|
try {
|
|
return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
interface PreferenceLookup {
|
|
preference: string | undefined;
|
|
source: 'project' | 'global' | 'none';
|
|
}
|
|
|
|
function lookupPreference(slug: string, questionId: string): PreferenceLookup {
|
|
const sr = stateRoot();
|
|
const projectFile = path.join(sr, 'projects', slug, 'question-preferences.json');
|
|
const globalFile = path.join(sr, 'global-question-preferences.json');
|
|
|
|
const project = readJsonSafe(projectFile);
|
|
if (project && typeof project[questionId] === 'string') {
|
|
return { preference: project[questionId] as string, source: 'project' };
|
|
}
|
|
const global = readJsonSafe(globalFile);
|
|
if (global && typeof global[questionId] === 'string') {
|
|
return { preference: global[questionId] as string, source: 'global' };
|
|
}
|
|
return { preference: undefined, source: 'none' };
|
|
}
|
|
|
|
interface RegistryEntry {
|
|
id: string;
|
|
door_type?: 'one-way' | 'two-way';
|
|
signal_key?: string;
|
|
}
|
|
|
|
interface MemoryNugget {
|
|
nugget: string;
|
|
applies_to_signal_keys: string[];
|
|
applied_at?: string;
|
|
}
|
|
|
|
/**
|
|
* Read per-session cache first, fall back to canonical local file. Cache
|
|
* invalidates by being missing — gstack-distill-apply doesn't touch the
|
|
* cache because the canonical file is always the source-of-truth on read
|
|
* miss. Sub-1ms cache reads (D13 perf).
|
|
*/
|
|
function loadMemoryNuggets(sessionId: string | undefined): MemoryNugget[] {
|
|
const sr = stateRoot();
|
|
const canonical = path.join(sr, 'free-text-memory.json');
|
|
let nuggets: MemoryNugget[] | null = null;
|
|
|
|
if (sessionId) {
|
|
const cachePath = path.join(sr, 'sessions', sessionId, 'memory-cache.json');
|
|
try {
|
|
const cached = JSON.parse(fs.readFileSync(cachePath, 'utf-8'));
|
|
if (Array.isArray(cached.nuggets)) {
|
|
return cached.nuggets;
|
|
}
|
|
} catch {
|
|
// miss → fall through
|
|
}
|
|
}
|
|
|
|
try {
|
|
const j = JSON.parse(fs.readFileSync(canonical, 'utf-8'));
|
|
nuggets = Array.isArray(j.nuggets) ? j.nuggets : [];
|
|
} catch {
|
|
nuggets = [];
|
|
}
|
|
|
|
// Write through to the per-session cache so subsequent hooks on this
|
|
// session take the fast path. Best-effort; never fails the hook.
|
|
if (sessionId && nuggets) {
|
|
try {
|
|
const dir = path.join(sr, 'sessions', sessionId);
|
|
fs.mkdirSync(dir, { recursive: true });
|
|
fs.writeFileSync(
|
|
path.join(dir, 'memory-cache.json'),
|
|
JSON.stringify({ nuggets, cached_at: new Date().toISOString() }, null, 2),
|
|
);
|
|
} catch {
|
|
// swallow
|
|
}
|
|
}
|
|
|
|
return nuggets || [];
|
|
}
|
|
|
|
/**
|
|
* For a given signal_key, return up to N nuggets whose applies_to_signal_keys
|
|
* include it. Sorted by recency (most-recently-applied first), capped.
|
|
*/
|
|
function nuggetsForSignal(nuggets: MemoryNugget[], signalKey: string, max = 3): string[] {
|
|
return nuggets
|
|
.filter((n) => Array.isArray(n.applies_to_signal_keys) && n.applies_to_signal_keys.includes(signalKey))
|
|
.sort((a, b) => (b.applied_at || '').localeCompare(a.applied_at || ''))
|
|
.slice(0, max)
|
|
.map((n) => n.nugget);
|
|
}
|
|
|
|
let registryCache: Record<string, RegistryEntry> | null = null;
|
|
|
|
function loadRegistry(): Record<string, RegistryEntry> {
|
|
if (registryCache) return registryCache;
|
|
registryCache = {};
|
|
try {
|
|
// Hook lives at hosts/claude/hooks/; registry at scripts/question-registry.ts
|
|
const here = path.dirname(new URL(import.meta.url).pathname);
|
|
const repoRoot = path.resolve(here, '..', '..', '..');
|
|
const regPath = path.join(repoRoot, 'scripts', 'question-registry.ts');
|
|
if (!fs.existsSync(regPath)) return registryCache;
|
|
const src = fs.readFileSync(regPath, 'utf-8');
|
|
// Cheap regex extraction so the hook doesn't need to import the TS file
|
|
// (which would require bun resolving the module at hook-invocation time).
|
|
// Matches entries like:
|
|
// 'ship-test-failure-triage': {
|
|
// id: 'ship-test-failure-triage',
|
|
// ...
|
|
// door_type: 'one-way',
|
|
// signal_key: 'test-discipline',
|
|
// ...
|
|
// },
|
|
const blockRe =
|
|
/'([a-z0-9-]+)':\s*\{[^}]*?door_type:\s*'(one-way|two-way)'[^}]*?\}/g;
|
|
let m: RegExpExecArray | null;
|
|
while ((m = blockRe.exec(src))) {
|
|
const [block, id, door_type] = m;
|
|
const sk = block.match(/signal_key:\s*'([a-z0-9-]+)'/);
|
|
registryCache[id] = {
|
|
id,
|
|
door_type: door_type as 'one-way' | 'two-way',
|
|
signal_key: sk ? sk[1] : undefined,
|
|
};
|
|
}
|
|
} catch (e) {
|
|
logHookError(`registry load failed: ${(e as Error).message}`);
|
|
}
|
|
return registryCache;
|
|
}
|
|
|
|
function optionLabels(opts: Array<string | { label?: string; description?: string }>): string[] {
|
|
return opts.map((o) => (typeof o === 'string' ? o : o.label || o.description || ''));
|
|
}
|
|
|
|
function extractRecommended(
|
|
questionText: string,
|
|
opts: string[],
|
|
): { recommended: string | undefined; ambiguous: boolean } {
|
|
const labelMatches = opts.filter((o) => RECOMMENDED_LABEL_RE.test(o));
|
|
if (labelMatches.length === 1) {
|
|
return { recommended: labelMatches[0].replace(RECOMMENDED_LABEL_RE, '').trim(), ambiguous: false };
|
|
}
|
|
if (labelMatches.length > 1) return { recommended: undefined, ambiguous: true };
|
|
|
|
const m = questionText.match(/Recommendation:\s*([^\n]+)/i);
|
|
if (!m) return { recommended: undefined, ambiguous: false };
|
|
const recPhrase = m[1].trim();
|
|
const prefixMatches = opts.filter((o) =>
|
|
o.toLowerCase().startsWith(recPhrase.toLowerCase().slice(0, 12)),
|
|
);
|
|
if (prefixMatches.length === 1) return { recommended: prefixMatches[0], ambiguous: false };
|
|
if (prefixMatches.length > 1) return { recommended: undefined, ambiguous: true };
|
|
return { recommended: undefined, ambiguous: false };
|
|
}
|
|
|
|
function slugFromCwd(cwd: string | undefined): string {
|
|
// Mirror gstack-slug's basename fallback. The full slug resolver shells out
|
|
// to git, which is too expensive on a hot hook path; the basename is close
|
|
// enough for preference lookup (preferences are keyed by question_id, slug
|
|
// is just the directory bucket).
|
|
if (!cwd) return 'unknown';
|
|
return path.basename(cwd);
|
|
}
|
|
|
|
function markAutoDecided(sessionId: string | undefined, toolUseId: string | undefined): void {
|
|
if (!sessionId || !toolUseId) return;
|
|
try {
|
|
const sr = stateRoot();
|
|
const dir = path.join(sr, 'sessions', sessionId);
|
|
fs.mkdirSync(dir, { recursive: true });
|
|
fs.writeFileSync(path.join(dir, `.auto-decided-${toolUseId}`), '');
|
|
} catch (e) {
|
|
logHookError(`markAutoDecided failed: ${(e as Error).message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Log an auto-decided event directly from PreToolUse, since `deny` prevents
|
|
* the tool from running and PostToolUse never fires. Without this, /plan-tune
|
|
* Recent auto-decisions would be blind to enforcement hits.
|
|
*/
|
|
function logAutoDecided(
|
|
questionId: string,
|
|
questionSummary: string,
|
|
recommended: string,
|
|
optionsCount: number,
|
|
sessionId: string | undefined,
|
|
toolUseId: string | undefined,
|
|
cwd: string | undefined,
|
|
): void {
|
|
try {
|
|
const here = path.dirname(new URL(import.meta.url).pathname);
|
|
const repoRoot = path.resolve(here, '..', '..', '..');
|
|
const bin = path.join(repoRoot, 'bin', 'gstack-question-log');
|
|
const payload: Record<string, unknown> = {
|
|
skill: 'unknown',
|
|
question_id: questionId,
|
|
question_summary: questionSummary.slice(0, 200),
|
|
options_count: optionsCount,
|
|
user_choice: recommended.slice(0, 64),
|
|
recommended: recommended.slice(0, 64),
|
|
source: 'auto-decided',
|
|
session_id: sessionId?.slice(0, 64),
|
|
tool_use_id: toolUseId?.slice(0, 128),
|
|
};
|
|
spawnSync(bin, [JSON.stringify(payload)], {
|
|
encoding: 'utf-8',
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
timeout: 3000,
|
|
// cwd of the originating tool call so gstack-slug resolves to the
|
|
// project the user is actually in, not the hook script's location.
|
|
cwd: cwd && fs.existsSync(cwd) ? cwd : undefined,
|
|
});
|
|
} catch (e) {
|
|
logHookError(`logAutoDecided failed: ${(e as Error).message}`);
|
|
}
|
|
}
|
|
|
|
async function main(): Promise<void> {
|
|
const raw = await readStdin();
|
|
if (!raw.trim()) {
|
|
defer();
|
|
return;
|
|
}
|
|
let stdin: HookStdin;
|
|
try {
|
|
stdin = JSON.parse(raw);
|
|
} catch (e) {
|
|
logHookError(`stdin parse failed: ${(e as Error).message}`);
|
|
defer();
|
|
return;
|
|
}
|
|
|
|
const toolName = stdin.tool_name || '';
|
|
if (
|
|
toolName !== 'AskUserQuestion' &&
|
|
!toolName.match(/^mcp__.+__AskUserQuestion$/)
|
|
) {
|
|
defer();
|
|
return;
|
|
}
|
|
|
|
const questions = stdin.tool_input?.questions || [];
|
|
if (questions.length === 0) {
|
|
defer();
|
|
return;
|
|
}
|
|
|
|
// For multi-question AUQ, enforcement is all-or-nothing per call:
|
|
// we deny only if ALL questions have marker + never-ask + safe door type.
|
|
// Mixed cases pass through (defer) so the user still gets to answer.
|
|
const registry = loadRegistry();
|
|
const slug = slugFromCwd(stdin.cwd);
|
|
const memoryNuggets = loadMemoryNuggets(stdin.session_id);
|
|
|
|
// Compute Layer 8 memory context inline: any nuggets matching the
|
|
// signal_keys of the questions in this AUQ get surfaced as additionalContext.
|
|
// This applies whether we defer OR deny — gives the agent + user the
|
|
// relevant prior context either way.
|
|
const contextNuggets: string[] = [];
|
|
for (const q of questions) {
|
|
const qText = q.question || '';
|
|
const marker = qText.match(MARKER_RE);
|
|
if (!marker) continue;
|
|
const entry = registry[marker[1]];
|
|
if (!entry?.signal_key) continue;
|
|
const hits = nuggetsForSignal(memoryNuggets, entry.signal_key);
|
|
for (const h of hits) {
|
|
if (!contextNuggets.includes(h)) contextNuggets.push(h);
|
|
}
|
|
}
|
|
const memoryContext = contextNuggets.length
|
|
? '[plan-tune memory] Past answers suggest: ' + contextNuggets.join(' | ')
|
|
: undefined;
|
|
|
|
const autoDecisions: Array<{ id: string; recommended: string }> = [];
|
|
for (const q of questions) {
|
|
const qText = q.question || '';
|
|
const marker = qText.match(MARKER_RE);
|
|
if (!marker) {
|
|
defer(memoryContext);
|
|
return;
|
|
}
|
|
const questionId = marker[1];
|
|
const pref = lookupPreference(slug, questionId);
|
|
if (!pref.preference || pref.preference === 'always-ask') {
|
|
defer(memoryContext);
|
|
return;
|
|
}
|
|
|
|
const entry = registry[questionId];
|
|
const doorType = entry?.door_type || 'two-way';
|
|
if (doorType === 'one-way') {
|
|
// Safety override — even never-ask doesn't bypass one-way doors.
|
|
defer(memoryContext);
|
|
return;
|
|
}
|
|
|
|
const opts = optionLabels(q.options || []);
|
|
const { recommended, ambiguous } = extractRecommended(qText, opts);
|
|
if (!recommended || ambiguous) {
|
|
// Refuse-on-ambiguous per D2 — fail safe, ask normally.
|
|
defer(memoryContext);
|
|
return;
|
|
}
|
|
autoDecisions.push({ id: questionId, recommended });
|
|
}
|
|
|
|
// All questions were eligible for enforcement.
|
|
markAutoDecided(stdin.session_id, stdin.tool_use_id);
|
|
|
|
// Log each auto-decided question now, since deny prevents PostToolUse from
|
|
// firing. /plan-tune Recent auto-decisions reads source=auto-decided events.
|
|
for (let i = 0; i < autoDecisions.length; i++) {
|
|
const d = autoDecisions[i];
|
|
const q = questions[i];
|
|
const qText = (q.question || '').replace(MARKER_RE, '').trim();
|
|
const opts = optionLabels(q.options || []);
|
|
logAutoDecided(d.id, qText, d.recommended, opts.length, stdin.session_id, stdin.tool_use_id, stdin.cwd);
|
|
}
|
|
|
|
const reasonLines = autoDecisions.map(
|
|
(d) =>
|
|
`[plan-tune auto-decide] ${d.id} → ${d.recommended} (your never-ask preference). Proceed with that option without re-prompting. Change with /plan-tune.`,
|
|
);
|
|
deny(reasonLines.join('\n'));
|
|
}
|
|
|
|
main().catch((e) => {
|
|
logHookError(`main crash: ${(e as Error).message}`);
|
|
defer();
|
|
});
|