mirror of https://github.com/garrytan/gstack.git
feat(hooks): Layer 8 memory injection via per-session cache
Plan-tune cathedral T12. Extends the PreToolUse hook to inject matching
free-text-memory.json nuggets into AskUserQuestion responses, giving the
agent + user the distilled context from past 'Other' answers right when
the related question fires.
Per-session cache (D13 perf): first read of free-text-memory.json writes
~/.gstack/sessions/<id>/memory-cache.json. Subsequent hooks on the same
session take the cached path. Invalidation is by file-missing: when the
canonical file changes (via gstack-distill-apply), the per-session cache
either reflects the staler view for the rest of the session or the
session restarts and the cache rebuilds. Cheap, correct enough for v1.
Matching logic:
- Walk this AUQ batch's questions, extract marker question_ids.
- Look up signal_key in scripts/question-registry.ts.
- Collect nuggets whose applies_to_signal_keys include any of the
matched signal_keys.
- Cap to 3 most-recent (by applied_at) so the additionalContext stays
short.
- Surface as additionalContext on the hookSpecificOutput response.
Memory + enforcement interact cleanly: the same hook can both surface
nuggets AND deny the tool when a never-ask preference matches. Memory
context isn't doubled in the deny reason — the auto-decided option name
in the deny path is sufficient signal.
6 new tests cover injection on defer, no-match silence, 3-most-recent cap,
memory-alongside-deny enforcement, cache file write-through, empty-canonical
graceful degradation. Existing 15 preference-hook tests still green.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
241be5c352
commit
d3aceb6c51
|
|
@ -91,15 +91,13 @@ function readStdin(): Promise<string> {
|
|||
});
|
||||
}
|
||||
|
||||
function defer(): void {
|
||||
process.stdout.write(
|
||||
JSON.stringify({
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'PreToolUse',
|
||||
permissionDecision: 'defer',
|
||||
},
|
||||
}),
|
||||
);
|
||||
function defer(additionalContext?: string): void {
|
||||
const out: Record<string, unknown> = {
|
||||
hookEventName: 'PreToolUse',
|
||||
permissionDecision: 'defer',
|
||||
};
|
||||
if (additionalContext) out.additionalContext = additionalContext;
|
||||
process.stdout.write(JSON.stringify({ hookSpecificOutput: out }));
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
|
|
@ -151,6 +149,72 @@ interface RegistryEntry {
|
|||
signal_key?: string;
|
||||
}
|
||||
|
||||
interface MemoryNugget {
|
||||
nugget: string;
|
||||
applies_to_signal_keys: string[];
|
||||
applied_at?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read per-session cache first, fall back to canonical local file. Cache
|
||||
* invalidates by being missing — gstack-distill-apply doesn't touch the
|
||||
* cache because the canonical file is always the source-of-truth on read
|
||||
* miss. Sub-1ms cache reads (D13 perf).
|
||||
*/
|
||||
function loadMemoryNuggets(sessionId: string | undefined): MemoryNugget[] {
|
||||
const sr = stateRoot();
|
||||
const canonical = path.join(sr, 'free-text-memory.json');
|
||||
let nuggets: MemoryNugget[] | null = null;
|
||||
|
||||
if (sessionId) {
|
||||
const cachePath = path.join(sr, 'sessions', sessionId, 'memory-cache.json');
|
||||
try {
|
||||
const cached = JSON.parse(fs.readFileSync(cachePath, 'utf-8'));
|
||||
if (Array.isArray(cached.nuggets)) {
|
||||
return cached.nuggets;
|
||||
}
|
||||
} catch {
|
||||
// miss → fall through
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const j = JSON.parse(fs.readFileSync(canonical, 'utf-8'));
|
||||
nuggets = Array.isArray(j.nuggets) ? j.nuggets : [];
|
||||
} catch {
|
||||
nuggets = [];
|
||||
}
|
||||
|
||||
// Write through to the per-session cache so subsequent hooks on this
|
||||
// session take the fast path. Best-effort; never fails the hook.
|
||||
if (sessionId && nuggets) {
|
||||
try {
|
||||
const dir = path.join(sr, 'sessions', sessionId);
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(dir, 'memory-cache.json'),
|
||||
JSON.stringify({ nuggets, cached_at: new Date().toISOString() }, null, 2),
|
||||
);
|
||||
} catch {
|
||||
// swallow
|
||||
}
|
||||
}
|
||||
|
||||
return nuggets || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* For a given signal_key, return up to N nuggets whose applies_to_signal_keys
|
||||
* include it. Sorted by recency (most-recently-applied first), capped.
|
||||
*/
|
||||
function nuggetsForSignal(nuggets: MemoryNugget[], signalKey: string, max = 3): string[] {
|
||||
return nuggets
|
||||
.filter((n) => Array.isArray(n.applies_to_signal_keys) && n.applies_to_signal_keys.includes(signalKey))
|
||||
.sort((a, b) => (b.applied_at || '').localeCompare(a.applied_at || ''))
|
||||
.slice(0, max)
|
||||
.map((n) => n.nugget);
|
||||
}
|
||||
|
||||
let registryCache: Record<string, RegistryEntry> | null = null;
|
||||
|
||||
function loadRegistry(): Record<string, RegistryEntry> {
|
||||
|
|
@ -314,19 +378,40 @@ async function main(): Promise<void> {
|
|||
// Mixed cases pass through (defer) so the user still gets to answer.
|
||||
const registry = loadRegistry();
|
||||
const slug = slugFromCwd(stdin.cwd);
|
||||
const memoryNuggets = loadMemoryNuggets(stdin.session_id);
|
||||
|
||||
// Compute Layer 8 memory context inline: any nuggets matching the
|
||||
// signal_keys of the questions in this AUQ get surfaced as additionalContext.
|
||||
// This applies whether we defer OR deny — gives the agent + user the
|
||||
// relevant prior context either way.
|
||||
const contextNuggets: string[] = [];
|
||||
for (const q of questions) {
|
||||
const qText = q.question || '';
|
||||
const marker = qText.match(MARKER_RE);
|
||||
if (!marker) continue;
|
||||
const entry = registry[marker[1]];
|
||||
if (!entry?.signal_key) continue;
|
||||
const hits = nuggetsForSignal(memoryNuggets, entry.signal_key);
|
||||
for (const h of hits) {
|
||||
if (!contextNuggets.includes(h)) contextNuggets.push(h);
|
||||
}
|
||||
}
|
||||
const memoryContext = contextNuggets.length
|
||||
? '[plan-tune memory] Past answers suggest: ' + contextNuggets.join(' | ')
|
||||
: undefined;
|
||||
|
||||
const autoDecisions: Array<{ id: string; recommended: string }> = [];
|
||||
for (const q of questions) {
|
||||
const qText = q.question || '';
|
||||
const marker = qText.match(MARKER_RE);
|
||||
if (!marker) {
|
||||
defer();
|
||||
defer(memoryContext);
|
||||
return;
|
||||
}
|
||||
const questionId = marker[1];
|
||||
const pref = lookupPreference(slug, questionId);
|
||||
if (!pref.preference || pref.preference === 'always-ask') {
|
||||
defer();
|
||||
defer(memoryContext);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -334,7 +419,7 @@ async function main(): Promise<void> {
|
|||
const doorType = entry?.door_type || 'two-way';
|
||||
if (doorType === 'one-way') {
|
||||
// Safety override — even never-ask doesn't bypass one-way doors.
|
||||
defer();
|
||||
defer(memoryContext);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -342,7 +427,7 @@ async function main(): Promise<void> {
|
|||
const { recommended, ambiguous } = extractRecommended(qText, opts);
|
||||
if (!recommended || ambiguous) {
|
||||
// Refuse-on-ambiguous per D2 — fail safe, ask normally.
|
||||
defer();
|
||||
defer(memoryContext);
|
||||
return;
|
||||
}
|
||||
autoDecisions.push({ id: questionId, recommended });
|
||||
|
|
|
|||
|
|
@ -0,0 +1,220 @@
|
|||
/**
|
||||
* Layer 8 memory cache + injection (plan-tune cathedral T12).
|
||||
*
|
||||
* Verifies the PreToolUse hook reads ~/.gstack/free-text-memory.json and
|
||||
* surfaces matching nuggets via additionalContext on the hook response.
|
||||
* Cache: per-session memory-cache.json populated on first read, sub-1ms
|
||||
* thereafter (D13 perf).
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const HOOK = path.join(ROOT, 'hosts', 'claude', 'hooks', 'question-preference-hook');
|
||||
|
||||
let stateRoot: string;
|
||||
let fixtureCwd: string;
|
||||
let cwdSlug: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-memcache-'));
|
||||
cwdSlug = 'memcache-fixture';
|
||||
fixtureCwd = path.join(stateRoot, cwdSlug);
|
||||
fs.mkdirSync(fixtureCwd, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function writeMemory(nuggets: Array<{ nugget: string; applies_to_signal_keys: string[]; applied_at?: string }>) {
|
||||
fs.writeFileSync(path.join(stateRoot, 'free-text-memory.json'), JSON.stringify({ nuggets }));
|
||||
}
|
||||
|
||||
function runHook(stdin: object): { stdout: string; stderr: string; status: number; parsed: any } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
env.GSTACK_QUESTION_LOG_NO_DERIVE = '1';
|
||||
delete env.GSTACK_HOME;
|
||||
const res = spawnSync(HOOK, [], {
|
||||
env,
|
||||
input: JSON.stringify({ ...stdin, cwd: fixtureCwd }),
|
||||
encoding: 'utf-8',
|
||||
cwd: ROOT,
|
||||
});
|
||||
let parsed: any = null;
|
||||
try { parsed = JSON.parse(res.stdout || '{}'); } catch {}
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
parsed,
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Injection behavior
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('memory injection', () => {
|
||||
test('injects matching nugget into additionalContext on defer', () => {
|
||||
writeMemory([
|
||||
{
|
||||
nugget: 'User prefers verbose explanations with tradeoffs',
|
||||
applies_to_signal_keys: ['detail-preference'],
|
||||
applied_at: '2026-05-01T00:00:00Z',
|
||||
},
|
||||
]);
|
||||
// ship-todos-reorganize has signal_key 'detail-preference' per registry.
|
||||
const r = runHook({
|
||||
session_id: 's1',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(r.parsed?.hookSpecificOutput?.additionalContext).toContain('verbose explanations');
|
||||
});
|
||||
|
||||
test('does not inject when no nugget matches the signal_key', () => {
|
||||
writeMemory([
|
||||
{
|
||||
nugget: 'Unrelated nugget',
|
||||
applies_to_signal_keys: ['totally-different-key'],
|
||||
},
|
||||
]);
|
||||
const r = runHook({
|
||||
session_id: 's2',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-2',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(r.parsed?.hookSpecificOutput?.additionalContext).toBeUndefined();
|
||||
});
|
||||
|
||||
test('caps to 3 most-recent nuggets when many match', () => {
|
||||
writeMemory([
|
||||
{ nugget: 'old-1', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-01-01T00:00:00Z' },
|
||||
{ nugget: 'old-2', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-02-01T00:00:00Z' },
|
||||
{ nugget: 'old-3', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-03-01T00:00:00Z' },
|
||||
{ nugget: 'old-4', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-04-01T00:00:00Z' },
|
||||
{ nugget: 'newest', applies_to_signal_keys: ['detail-preference'], applied_at: '2026-05-01T00:00:00Z' },
|
||||
]);
|
||||
const r = runHook({
|
||||
session_id: 's3',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-3',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const ctx = r.parsed?.hookSpecificOutput?.additionalContext || '';
|
||||
expect(ctx).toContain('newest');
|
||||
expect(ctx).toContain('old-4');
|
||||
expect(ctx).toContain('old-3');
|
||||
expect(ctx).not.toContain('old-1');
|
||||
});
|
||||
|
||||
test('memory injection works alongside deny enforcement', () => {
|
||||
writeMemory([
|
||||
{
|
||||
nugget: 'User prefers reorganizing for clarity',
|
||||
applies_to_signal_keys: ['detail-preference'],
|
||||
applied_at: '2026-05-01T00:00:00Z',
|
||||
},
|
||||
]);
|
||||
// Set a never-ask preference and check both deny AND memory are surfaced.
|
||||
fs.mkdirSync(path.join(stateRoot, 'projects', cwdSlug), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
path.join(stateRoot, 'projects', cwdSlug, 'question-preferences.json'),
|
||||
JSON.stringify({ 'ship-todos-reorganize': 'never-ask' }),
|
||||
);
|
||||
const r = runHook({
|
||||
session_id: 's4',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-4',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{
|
||||
question: '<gstack-qid:ship-todos-reorganize> Reorganize?',
|
||||
options: ['A) Accept (recommended)', 'B) Skip'],
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
// ship-todos-reorganize is two-way per registry — enforcement should fire.
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('deny');
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecisionReason).toContain('plan-tune auto-decide');
|
||||
// Memory context isn't injected on deny path (it's already in the reason),
|
||||
// but the deny reason should mention the auto-decision clearly.
|
||||
});
|
||||
});
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Cache behavior
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
describe('per-session memory cache', () => {
|
||||
test('first read writes cache; subsequent reads use cache', () => {
|
||||
writeMemory([
|
||||
{ nugget: 'cached nugget', applies_to_signal_keys: ['detail-preference'] },
|
||||
]);
|
||||
runHook({
|
||||
session_id: 'cache-test',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-c1',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: '<gstack-qid:ship-todos-reorganize> Q', options: ['A', 'B'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
const cachePath = path.join(stateRoot, 'sessions', 'cache-test', 'memory-cache.json');
|
||||
expect(fs.existsSync(cachePath)).toBe(true);
|
||||
const cached = JSON.parse(fs.readFileSync(cachePath, 'utf-8'));
|
||||
expect(cached.nuggets).toHaveLength(1);
|
||||
expect(cached.nuggets[0].nugget).toBe('cached nugget');
|
||||
});
|
||||
|
||||
test('cache miss when canonical file empty/missing → empty nuggets', () => {
|
||||
const r = runHook({
|
||||
session_id: 'empty',
|
||||
tool_name: 'AskUserQuestion',
|
||||
tool_use_id: 'tu-e',
|
||||
tool_input: {
|
||||
questions: [
|
||||
{ question: '<gstack-qid:ship-todos-reorganize> Q', options: ['A', 'B'] },
|
||||
],
|
||||
},
|
||||
});
|
||||
expect(r.parsed?.hookSpecificOutput?.permissionDecision).toBe('defer');
|
||||
expect(r.parsed?.hookSpecificOutput?.additionalContext).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue