mirror of https://github.com/garrytan/gstack.git
test(brain): resolver compression + detection-override regression pins
test/resolvers-gbrain-save-results.test.ts (140 LOC, 10 tests):
- Per-skill assertions for all 5 planning skills: emits gbrain put +
correct slug prefix + tag + title.
- Skip-header present so agent can short-circuit when gbrain isn't
on PATH.
- Compression pin: each per-skill block stays under 750 chars
(~190 tokens) — guards against a future "let me add one more
line" refactor silently re-inflating toward the ~1000-token naive
un-suppression baseline.
- Generic fallback for unmapped skill names still works.
- /investigate gets the data-research routing suffix; non-investigate
skills do not.
- generateGBrainContextLoad stays under 500 chars (~125 tokens).
test/gbrain-detection-override.test.ts (120 LOC, 4 tests):
- End-to-end through gen-skill-docs subprocess against an isolated
temp GSTACK_HOME. Asserts:
* detected:true un-suppresses GBRAIN_* → SKILL.md gains the block
* detected:false (status != "ok") suppresses → no block
* no detection file suppresses → no block (graceful default)
* no --respect-detection flag IGNORES the detection file → no
block (CI canonical path stays reproducible)
Each detection-override test restores the canonical SKILL.md in a
finally block so the working tree stays clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
78313b4573
commit
7c82ec055e
|
|
@ -0,0 +1,193 @@
|
|||
/**
|
||||
* Regression pin for the setup-time gbrain detection → gen-skill-docs
|
||||
* override (T2 / v1.50.0.0).
|
||||
*
|
||||
* The override mechanism lives in scripts/gen-skill-docs.ts: when invoked
|
||||
* with --respect-detection, it reads ~/.gstack/gbrain-detection.json and
|
||||
* un-suppresses GBRAIN_CONTEXT_LOAD + GBRAIN_SAVE_RESULTS for hosts that
|
||||
* statically list them in suppressedResolvers (claude, codex, slate,
|
||||
* factory, opencode, openclaw, cursor, kiro).
|
||||
*
|
||||
* Tests drive gen-skill-docs as a subprocess against a temp GSTACK_HOME
|
||||
* with each detection state, then assert what landed in the generated
|
||||
* Claude-host SKILL.md. This is end-to-end through the actual override
|
||||
* pipeline — no mocking — so it catches regressions in either the loader
|
||||
* or the suppressedResolvers filter.
|
||||
*
|
||||
* Gate-tier, free, ~3-5s per test (gen-skill-docs runs the full skill
|
||||
* generation against the real repo; --host claude scopes to one host).
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
||||
import { execFileSync } from 'child_process';
|
||||
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
|
||||
const REPO_ROOT = join(import.meta.dir, '..');
|
||||
|
||||
interface FixtureEnv {
|
||||
tmpHome: string;
|
||||
cleanup: () => void;
|
||||
}
|
||||
|
||||
function makeFixture(detectionJson: string | null): FixtureEnv {
|
||||
const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-test-'));
|
||||
if (detectionJson !== null) {
|
||||
writeFileSync(join(tmpHome, 'gbrain-detection.json'), detectionJson);
|
||||
}
|
||||
return {
|
||||
tmpHome,
|
||||
cleanup: () => {
|
||||
try {
|
||||
rmSync(tmpHome, { recursive: true, force: true });
|
||||
} catch {
|
||||
// best effort
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run gen-skill-docs with --respect-detection and an isolated GSTACK_HOME.
|
||||
* Returns the regenerated office-hours/SKILL.md content WITHOUT writing
|
||||
* over the committed file: we use --dry-run to keep the working tree
|
||||
* clean, then parse the output via re-reading the committed file... no,
|
||||
* that doesn't work for dry-run since dry-run doesn't write.
|
||||
*
|
||||
* Approach: generate to a temp output dir by running gen-skill-docs in a
|
||||
* temp checkout. Simpler alternative: actually regenerate, snapshot the
|
||||
* file content, then git-checkout the committed version back. We use this
|
||||
* since gen-skill-docs doesn't expose an output-path arg.
|
||||
*/
|
||||
function regenAndSnapshot(opts: {
|
||||
respectDetection: boolean;
|
||||
tmpHome: string;
|
||||
files: string[];
|
||||
}): Map<string, string> {
|
||||
// Save committed content so we can restore after snapshotting.
|
||||
const original = new Map<string, string>();
|
||||
for (const f of opts.files) {
|
||||
original.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
|
||||
}
|
||||
|
||||
const args = [
|
||||
'run',
|
||||
'scripts/gen-skill-docs.ts',
|
||||
'--host',
|
||||
'claude',
|
||||
];
|
||||
if (opts.respectDetection) args.push('--respect-detection');
|
||||
|
||||
try {
|
||||
execFileSync('bun', args, {
|
||||
cwd: REPO_ROOT,
|
||||
env: { ...process.env, GSTACK_HOME: opts.tmpHome },
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout: 30_000,
|
||||
});
|
||||
|
||||
// Snapshot the regenerated content.
|
||||
const snapshot = new Map<string, string>();
|
||||
for (const f of opts.files) {
|
||||
snapshot.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
|
||||
}
|
||||
return snapshot;
|
||||
} finally {
|
||||
// Always restore so the test leaves the working tree clean.
|
||||
for (const [f, content] of original) {
|
||||
writeFileSync(join(REPO_ROOT, f), content);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
describe('gbrain detection override → gen-skill-docs', () => {
|
||||
// Single skill probe is enough to assert the override pipeline. The
|
||||
// resolver unit test (test/resolvers-gbrain-save-results.test.ts) covers
|
||||
// per-skill metadata correctness already.
|
||||
const PROBE_FILES = ['office-hours/SKILL.md'];
|
||||
|
||||
test('with detected:true, Claude-host SKILL.md gains brain-aware blocks', () => {
|
||||
const { tmpHome, cleanup } = makeFixture(
|
||||
JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
|
||||
);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: true,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
|
||||
// GBRAIN_SAVE_RESULTS un-suppressed → resolver output rendered.
|
||||
expect(content).toContain('## Save Results to Brain');
|
||||
expect(content).toContain('gbrain put "office-hours/');
|
||||
expect(content).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
|
||||
// GBRAIN_CONTEXT_LOAD also un-suppressed (D6 bundling).
|
||||
expect(content).toContain('## Brain Context Load');
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('with detected:false (status != "ok"), brain blocks stay suppressed', () => {
|
||||
const { tmpHome, cleanup } = makeFixture(
|
||||
JSON.stringify({ gbrain_local_status: 'no-cli', gbrain_on_path: false, gbrain_version: null }),
|
||||
);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: true,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
|
||||
// GBRAIN_SAVE_RESULTS suppressed → no rendered block, no gbrain put line.
|
||||
expect(content).not.toContain('gbrain put "office-hours/');
|
||||
// Section header from the resolver also absent (resolver returns "").
|
||||
// BUT — the BRAIN_CACHE_REFRESH and BRAIN_WRITE_BACK resolvers are NOT
|
||||
// gated by detection (host-agnostic), so other "Brain ..." sections may
|
||||
// still appear. We only assert the SAVE_RESULTS-specific marker is gone.
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('with NO detection file, brain blocks stay suppressed (same as detected:false)', () => {
|
||||
const { tmpHome, cleanup } = makeFixture(null);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: true,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
expect(content).not.toContain('gbrain put "office-hours/');
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('without --respect-detection flag, detection file is IGNORED (CI canonical path)', () => {
|
||||
// Even if a detection file exists with detected:true, the default
|
||||
// `bun run gen:skill-docs` (CI) must produce no-gbrain output so the
|
||||
// committed SKILL.md stays reproducible regardless of any developer's
|
||||
// local gbrain install state.
|
||||
const { tmpHome, cleanup } = makeFixture(
|
||||
JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
|
||||
);
|
||||
try {
|
||||
const snap = regenAndSnapshot({
|
||||
respectDetection: false,
|
||||
tmpHome,
|
||||
files: PROBE_FILES,
|
||||
});
|
||||
const content = snap.get('office-hours/SKILL.md')!;
|
||||
expect(content).not.toContain('gbrain put "office-hours/');
|
||||
expect(content).not.toContain('## Save Results to Brain');
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,137 @@
|
|||
/**
|
||||
* Resolver regression pin for generateGBrainSaveResults +
|
||||
* generateGBrainContextLoad (compressed in v1.50.0.0).
|
||||
*
|
||||
* Two coverage stories:
|
||||
* 1. **Wiring symmetry**: all 5 planning skills (office-hours, plan-ceo-review,
|
||||
* plan-eng-review, plan-design-review, plan-devex-review) get the correct
|
||||
* slug prefix + tag in the emitted save instructions.
|
||||
* 2. **Token-budget pin**: post-compression, each block stays under a chars
|
||||
* ceiling so a future "let me just add one more line" refactor doesn't
|
||||
* silently re-inflate the prompt cost back toward the ~1000-token
|
||||
* naive-un-suppression baseline.
|
||||
*
|
||||
* Gate-tier, free, pure import + render — no host generation, no claude -p.
|
||||
*/
|
||||
|
||||
import { describe, test, expect } from 'bun:test';
|
||||
import {
|
||||
generateGBrainContextLoad,
|
||||
generateGBrainSaveResults,
|
||||
} from '../scripts/resolvers/gbrain';
|
||||
import { HOST_PATHS } from '../scripts/resolvers/types';
|
||||
import type { TemplateContext } from '../scripts/resolvers/types';
|
||||
|
||||
function buildCtx(skillName: string): TemplateContext {
|
||||
return {
|
||||
skillName,
|
||||
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
|
||||
host: 'claude',
|
||||
paths: HOST_PATHS.claude,
|
||||
};
|
||||
}
|
||||
|
||||
// Per-skill expected slug prefix + tag. If you add a new planning skill,
|
||||
// add it here AND in scripts/resolvers/gbrain.ts skillSaveMap. If you rename
|
||||
// one, this test will fail loudly — that's the regression pin working.
|
||||
const PLANNING_SKILLS: Array<{ skill: string; slugPrefix: string; tag: string; title: string }> = [
|
||||
{ skill: 'office-hours', slugPrefix: 'office-hours/', tag: 'design-doc', title: 'Office Hours' },
|
||||
{ skill: 'plan-ceo-review', slugPrefix: 'ceo-plans/', tag: 'ceo-plan', title: 'CEO Plan' },
|
||||
{ skill: 'plan-eng-review', slugPrefix: 'eng-reviews/', tag: 'eng-review', title: 'Eng Review' },
|
||||
{ skill: 'plan-design-review', slugPrefix: 'design-reviews/', tag: 'design-review', title: 'Design Review' },
|
||||
{ skill: 'plan-devex-review', slugPrefix: 'devex-reviews/', tag: 'devex-review', title: 'Devex Review' },
|
||||
];
|
||||
|
||||
describe('generateGBrainSaveResults — wiring + compression pin', () => {
|
||||
test.each(PLANNING_SKILLS)(
|
||||
'$skill emits gbrain put $slugPrefix... with $tag tag',
|
||||
({ skill, slugPrefix, tag, title }) => {
|
||||
const out = generateGBrainSaveResults(buildCtx(skill));
|
||||
|
||||
// Uses gbrain put (v0.18+ subcommand), not deprecated put_page MCP op.
|
||||
expect(out).toContain('gbrain put');
|
||||
expect(out).not.toContain('put_page');
|
||||
|
||||
// Per-skill slug prefix is exactly what skillSaveMap declares.
|
||||
expect(out).toContain(`"${slugPrefix}<feature-slug>"`);
|
||||
|
||||
// Title prefix + tag match the metadata.
|
||||
expect(out).toContain(`title: "${title}:`);
|
||||
expect(out).toContain(`tags: [${tag},`);
|
||||
|
||||
// Skip-header is present so agent can short-circuit when gbrain is absent.
|
||||
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
|
||||
// Compact: points to docs/gbrain-write-surfaces.md for full template.
|
||||
expect(out).toContain('docs/gbrain-write-surfaces.md');
|
||||
},
|
||||
);
|
||||
|
||||
test('all 5 planning skills produce output under ~600 chars (~150 tokens)', () => {
|
||||
// Token-budget pin. Naive un-suppression would emit ~1000 tokens (~4000 chars)
|
||||
// per skill. Compressed target: ~150 tokens (~600 chars). Generous ceiling
|
||||
// at 750 chars to leave room for the heredoc structure without inviting a
|
||||
// gradual re-inflation of the prose.
|
||||
const CEILING_CHARS = 750;
|
||||
for (const { skill } of PLANNING_SKILLS) {
|
||||
const out = generateGBrainSaveResults(buildCtx(skill));
|
||||
if (out.length > CEILING_CHARS) {
|
||||
throw new Error(
|
||||
`generateGBrainSaveResults('${skill}') emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
|
||||
`exceeds ceiling of ${CEILING_CHARS} chars (~${Math.round(CEILING_CHARS / 4)} tokens). ` +
|
||||
`If you added necessary content, move the verbose prose into ` +
|
||||
`docs/gbrain-write-surfaces.md §Save Template (which the agent reads on demand) and ` +
|
||||
`keep the inline block as a short pointer + per-skill metadata. ` +
|
||||
`See gbrain.ts T4/v1.50.0.0 compression rationale.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('unmapped skill name falls through to compact generic template', () => {
|
||||
const out = generateGBrainSaveResults(buildCtx('no-such-skill'));
|
||||
|
||||
// Generic fallback still emits gbrain put + skip-header + docs pointer.
|
||||
expect(out).toContain('gbrain put');
|
||||
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
expect(out).toContain('docs/gbrain-write-surfaces.md');
|
||||
|
||||
// Should NOT contain a per-skill slug prefix from the map (would mean we
|
||||
// accidentally regressed to the per-skill path for an unmapped skill).
|
||||
for (const { slugPrefix } of PLANNING_SKILLS) {
|
||||
expect(out).not.toContain(`"${slugPrefix}<feature-slug>"`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateGBrainContextLoad — compression pin', () => {
|
||||
test('emits skip-header and docs pointer, stays under ~500 chars', () => {
|
||||
// Same compression discipline as SAVE_RESULTS. Context load was ~350-450
|
||||
// tokens before compression; target ~80 tokens (~320 chars). Ceiling
|
||||
// generous at 500 chars to leave room for skill-specific suffixes.
|
||||
const out = generateGBrainContextLoad(buildCtx('plan-ceo-review'));
|
||||
expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
|
||||
expect(out).toContain('docs/gbrain-write-surfaces.md');
|
||||
expect(out).toContain('gbrain search');
|
||||
expect(out).toContain('gbrain get_page');
|
||||
if (out.length > 500) {
|
||||
throw new Error(
|
||||
`generateGBrainContextLoad emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
|
||||
`exceeds ceiling of 500 chars (~125 tokens). ` +
|
||||
`Move verbose prose to docs/gbrain-write-surfaces.md §Context Load.`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test('/investigate gets the data-research routing suffix', () => {
|
||||
const out = generateGBrainContextLoad(buildCtx('investigate'));
|
||||
expect(out).toContain('data-research');
|
||||
});
|
||||
|
||||
test('non-investigate skills do NOT get the data-research suffix', () => {
|
||||
for (const { skill } of PLANNING_SKILLS) {
|
||||
const out = generateGBrainContextLoad(buildCtx(skill));
|
||||
expect(out).not.toContain('data-research');
|
||||
}
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue