From 7c82ec055ee7e7b8fcf61e7b0ad690ebd808ae80 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 08:34:27 -0700 Subject: [PATCH] test(brain): resolver compression + detection-override regression pins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit test/resolvers-gbrain-save-results.test.ts (140 LOC, 10 tests): - Per-skill assertions for all 5 planning skills: emits gbrain put + correct slug prefix + tag + title. - Skip-header present so agent can short-circuit when gbrain isn't on PATH. - Compression pin: each per-skill block stays under 750 chars (~190 tokens) — guards against a future "let me add one more line" refactor silently re-inflating toward the ~1000-token naive un-suppression baseline. - Generic fallback for unmapped skill names still works. - /investigate gets the data-research routing suffix; non-investigate skills do not. - generateGBrainContextLoad stays under 500 chars (~125 tokens). test/gbrain-detection-override.test.ts (120 LOC, 4 tests): - End-to-end through gen-skill-docs subprocess against an isolated temp GSTACK_HOME. Asserts: * detected:true un-suppresses GBRAIN_* → SKILL.md gains the block * detected:false (status != "ok") suppresses → no block * no detection file suppresses → no block (graceful default) * no --respect-detection flag IGNORES the detection file → no block (CI canonical path stays reproducible) Each detection-override test restores the canonical SKILL.md in a finally block so the working tree stays clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/gbrain-detection-override.test.ts | 193 +++++++++++++++++++++ test/resolvers-gbrain-save-results.test.ts | 137 +++++++++++++++ 2 files changed, 330 insertions(+) create mode 100644 test/gbrain-detection-override.test.ts create mode 100644 test/resolvers-gbrain-save-results.test.ts diff --git a/test/gbrain-detection-override.test.ts b/test/gbrain-detection-override.test.ts new file mode 100644 index 000000000..b1b13ccbf --- /dev/null +++ b/test/gbrain-detection-override.test.ts @@ -0,0 +1,193 @@ +/** + * Regression pin for the setup-time gbrain detection → gen-skill-docs + * override (T2 / v1.50.0.0). + * + * The override mechanism lives in scripts/gen-skill-docs.ts: when invoked + * with --respect-detection, it reads ~/.gstack/gbrain-detection.json and + * un-suppresses GBRAIN_CONTEXT_LOAD + GBRAIN_SAVE_RESULTS for hosts that + * statically list them in suppressedResolvers (claude, codex, slate, + * factory, opencode, openclaw, cursor, kiro). + * + * Tests drive gen-skill-docs as a subprocess against a temp GSTACK_HOME + * with each detection state, then assert what landed in the generated + * Claude-host SKILL.md. This is end-to-end through the actual override + * pipeline — no mocking — so it catches regressions in either the loader + * or the suppressedResolvers filter. + * + * Gate-tier, free, ~3-5s per test (gen-skill-docs runs the full skill + * generation against the real repo; --host claude scopes to one host). + */ + +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { execFileSync } from 'child_process'; +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; + +const REPO_ROOT = join(import.meta.dir, '..'); + +interface FixtureEnv { + tmpHome: string; + cleanup: () => void; +} + +function makeFixture(detectionJson: string | null): FixtureEnv { + const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-test-')); + if (detectionJson !== null) { + writeFileSync(join(tmpHome, 'gbrain-detection.json'), detectionJson); + } + return { + tmpHome, + cleanup: () => { + try { + rmSync(tmpHome, { recursive: true, force: true }); + } catch { + // best effort + } + }, + }; +} + +/** + * Run gen-skill-docs with --respect-detection and an isolated GSTACK_HOME. + * Returns the regenerated office-hours/SKILL.md content WITHOUT writing + * over the committed file: we use --dry-run to keep the working tree + * clean, then parse the output via re-reading the committed file... no, + * that doesn't work for dry-run since dry-run doesn't write. + * + * Approach: generate to a temp output dir by running gen-skill-docs in a + * temp checkout. Simpler alternative: actually regenerate, snapshot the + * file content, then git-checkout the committed version back. We use this + * since gen-skill-docs doesn't expose an output-path arg. + */ +function regenAndSnapshot(opts: { + respectDetection: boolean; + tmpHome: string; + files: string[]; +}): Map { + // Save committed content so we can restore after snapshotting. + const original = new Map(); + for (const f of opts.files) { + original.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8')); + } + + const args = [ + 'run', + 'scripts/gen-skill-docs.ts', + '--host', + 'claude', + ]; + if (opts.respectDetection) args.push('--respect-detection'); + + try { + execFileSync('bun', args, { + cwd: REPO_ROOT, + env: { ...process.env, GSTACK_HOME: opts.tmpHome }, + stdio: ['ignore', 'pipe', 'pipe'], + timeout: 30_000, + }); + + // Snapshot the regenerated content. + const snapshot = new Map(); + for (const f of opts.files) { + snapshot.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8')); + } + return snapshot; + } finally { + // Always restore so the test leaves the working tree clean. + for (const [f, content] of original) { + writeFileSync(join(REPO_ROOT, f), content); + } + } +} + +describe('gbrain detection override → gen-skill-docs', () => { + // Single skill probe is enough to assert the override pipeline. The + // resolver unit test (test/resolvers-gbrain-save-results.test.ts) covers + // per-skill metadata correctness already. + const PROBE_FILES = ['office-hours/SKILL.md']; + + test('with detected:true, Claude-host SKILL.md gains brain-aware blocks', () => { + const { tmpHome, cleanup } = makeFixture( + JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }), + ); + try { + const snap = regenAndSnapshot({ + respectDetection: true, + tmpHome, + files: PROBE_FILES, + }); + const content = snap.get('office-hours/SKILL.md')!; + + // GBRAIN_SAVE_RESULTS un-suppressed → resolver output rendered. + expect(content).toContain('## Save Results to Brain'); + expect(content).toContain('gbrain put "office-hours/'); + expect(content).toContain('Skip this entire section if `gbrain` is not on PATH'); + + // GBRAIN_CONTEXT_LOAD also un-suppressed (D6 bundling). + expect(content).toContain('## Brain Context Load'); + } finally { + cleanup(); + } + }); + + test('with detected:false (status != "ok"), brain blocks stay suppressed', () => { + const { tmpHome, cleanup } = makeFixture( + JSON.stringify({ gbrain_local_status: 'no-cli', gbrain_on_path: false, gbrain_version: null }), + ); + try { + const snap = regenAndSnapshot({ + respectDetection: true, + tmpHome, + files: PROBE_FILES, + }); + const content = snap.get('office-hours/SKILL.md')!; + + // GBRAIN_SAVE_RESULTS suppressed → no rendered block, no gbrain put line. + expect(content).not.toContain('gbrain put "office-hours/'); + // Section header from the resolver also absent (resolver returns ""). + // BUT — the BRAIN_CACHE_REFRESH and BRAIN_WRITE_BACK resolvers are NOT + // gated by detection (host-agnostic), so other "Brain ..." sections may + // still appear. We only assert the SAVE_RESULTS-specific marker is gone. + } finally { + cleanup(); + } + }); + + test('with NO detection file, brain blocks stay suppressed (same as detected:false)', () => { + const { tmpHome, cleanup } = makeFixture(null); + try { + const snap = regenAndSnapshot({ + respectDetection: true, + tmpHome, + files: PROBE_FILES, + }); + const content = snap.get('office-hours/SKILL.md')!; + expect(content).not.toContain('gbrain put "office-hours/'); + } finally { + cleanup(); + } + }); + + test('without --respect-detection flag, detection file is IGNORED (CI canonical path)', () => { + // Even if a detection file exists with detected:true, the default + // `bun run gen:skill-docs` (CI) must produce no-gbrain output so the + // committed SKILL.md stays reproducible regardless of any developer's + // local gbrain install state. + const { tmpHome, cleanup } = makeFixture( + JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }), + ); + try { + const snap = regenAndSnapshot({ + respectDetection: false, + tmpHome, + files: PROBE_FILES, + }); + const content = snap.get('office-hours/SKILL.md')!; + expect(content).not.toContain('gbrain put "office-hours/'); + expect(content).not.toContain('## Save Results to Brain'); + } finally { + cleanup(); + } + }); +}); diff --git a/test/resolvers-gbrain-save-results.test.ts b/test/resolvers-gbrain-save-results.test.ts new file mode 100644 index 000000000..c697262d0 --- /dev/null +++ b/test/resolvers-gbrain-save-results.test.ts @@ -0,0 +1,137 @@ +/** + * Resolver regression pin for generateGBrainSaveResults + + * generateGBrainContextLoad (compressed in v1.50.0.0). + * + * Two coverage stories: + * 1. **Wiring symmetry**: all 5 planning skills (office-hours, plan-ceo-review, + * plan-eng-review, plan-design-review, plan-devex-review) get the correct + * slug prefix + tag in the emitted save instructions. + * 2. **Token-budget pin**: post-compression, each block stays under a chars + * ceiling so a future "let me just add one more line" refactor doesn't + * silently re-inflate the prompt cost back toward the ~1000-token + * naive-un-suppression baseline. + * + * Gate-tier, free, pure import + render — no host generation, no claude -p. + */ + +import { describe, test, expect } from 'bun:test'; +import { + generateGBrainContextLoad, + generateGBrainSaveResults, +} from '../scripts/resolvers/gbrain'; +import { HOST_PATHS } from '../scripts/resolvers/types'; +import type { TemplateContext } from '../scripts/resolvers/types'; + +function buildCtx(skillName: string): TemplateContext { + return { + skillName, + tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`, + host: 'claude', + paths: HOST_PATHS.claude, + }; +} + +// Per-skill expected slug prefix + tag. If you add a new planning skill, +// add it here AND in scripts/resolvers/gbrain.ts skillSaveMap. If you rename +// one, this test will fail loudly — that's the regression pin working. +const PLANNING_SKILLS: Array<{ skill: string; slugPrefix: string; tag: string; title: string }> = [ + { skill: 'office-hours', slugPrefix: 'office-hours/', tag: 'design-doc', title: 'Office Hours' }, + { skill: 'plan-ceo-review', slugPrefix: 'ceo-plans/', tag: 'ceo-plan', title: 'CEO Plan' }, + { skill: 'plan-eng-review', slugPrefix: 'eng-reviews/', tag: 'eng-review', title: 'Eng Review' }, + { skill: 'plan-design-review', slugPrefix: 'design-reviews/', tag: 'design-review', title: 'Design Review' }, + { skill: 'plan-devex-review', slugPrefix: 'devex-reviews/', tag: 'devex-review', title: 'Devex Review' }, +]; + +describe('generateGBrainSaveResults — wiring + compression pin', () => { + test.each(PLANNING_SKILLS)( + '$skill emits gbrain put $slugPrefix... with $tag tag', + ({ skill, slugPrefix, tag, title }) => { + const out = generateGBrainSaveResults(buildCtx(skill)); + + // Uses gbrain put (v0.18+ subcommand), not deprecated put_page MCP op. + expect(out).toContain('gbrain put'); + expect(out).not.toContain('put_page'); + + // Per-skill slug prefix is exactly what skillSaveMap declares. + expect(out).toContain(`"${slugPrefix}"`); + + // Title prefix + tag match the metadata. + expect(out).toContain(`title: "${title}:`); + expect(out).toContain(`tags: [${tag},`); + + // Skip-header is present so agent can short-circuit when gbrain is absent. + expect(out).toContain('Skip this entire section if `gbrain` is not on PATH'); + + // Compact: points to docs/gbrain-write-surfaces.md for full template. + expect(out).toContain('docs/gbrain-write-surfaces.md'); + }, + ); + + test('all 5 planning skills produce output under ~600 chars (~150 tokens)', () => { + // Token-budget pin. Naive un-suppression would emit ~1000 tokens (~4000 chars) + // per skill. Compressed target: ~150 tokens (~600 chars). Generous ceiling + // at 750 chars to leave room for the heredoc structure without inviting a + // gradual re-inflation of the prose. + const CEILING_CHARS = 750; + for (const { skill } of PLANNING_SKILLS) { + const out = generateGBrainSaveResults(buildCtx(skill)); + if (out.length > CEILING_CHARS) { + throw new Error( + `generateGBrainSaveResults('${skill}') emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` + + `exceeds ceiling of ${CEILING_CHARS} chars (~${Math.round(CEILING_CHARS / 4)} tokens). ` + + `If you added necessary content, move the verbose prose into ` + + `docs/gbrain-write-surfaces.md §Save Template (which the agent reads on demand) and ` + + `keep the inline block as a short pointer + per-skill metadata. ` + + `See gbrain.ts T4/v1.50.0.0 compression rationale.`, + ); + } + } + }); + + test('unmapped skill name falls through to compact generic template', () => { + const out = generateGBrainSaveResults(buildCtx('no-such-skill')); + + // Generic fallback still emits gbrain put + skip-header + docs pointer. + expect(out).toContain('gbrain put'); + expect(out).toContain('Skip this entire section if `gbrain` is not on PATH'); + expect(out).toContain('docs/gbrain-write-surfaces.md'); + + // Should NOT contain a per-skill slug prefix from the map (would mean we + // accidentally regressed to the per-skill path for an unmapped skill). + for (const { slugPrefix } of PLANNING_SKILLS) { + expect(out).not.toContain(`"${slugPrefix}"`); + } + }); +}); + +describe('generateGBrainContextLoad — compression pin', () => { + test('emits skip-header and docs pointer, stays under ~500 chars', () => { + // Same compression discipline as SAVE_RESULTS. Context load was ~350-450 + // tokens before compression; target ~80 tokens (~320 chars). Ceiling + // generous at 500 chars to leave room for skill-specific suffixes. + const out = generateGBrainContextLoad(buildCtx('plan-ceo-review')); + expect(out).toContain('Skip this entire section if `gbrain` is not on PATH'); + expect(out).toContain('docs/gbrain-write-surfaces.md'); + expect(out).toContain('gbrain search'); + expect(out).toContain('gbrain get_page'); + if (out.length > 500) { + throw new Error( + `generateGBrainContextLoad emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` + + `exceeds ceiling of 500 chars (~125 tokens). ` + + `Move verbose prose to docs/gbrain-write-surfaces.md §Context Load.`, + ); + } + }); + + test('/investigate gets the data-research routing suffix', () => { + const out = generateGBrainContextLoad(buildCtx('investigate')); + expect(out).toContain('data-research'); + }); + + test('non-investigate skills do NOT get the data-research suffix', () => { + for (const { skill } of PLANNING_SKILLS) { + const out = generateGBrainContextLoad(buildCtx(skill)); + expect(out).not.toContain('data-research'); + } + }); +});