test(brain): schema migration + fence-block fallback + preflight budget (T19+T21)

3 new gate-tier test files closing the most important coverage gaps in
the brain-aware planning layer:

test/schema-version-migration.test.ts (D4 A4):
  - Cache file with mismatched schema_version triggers wipe-and-rebuild
  - Matching version + fresh TTL stays warm-hit (no unnecessary rebuild)
  - Rebuild wipes ALL files in scope, not just the one being read

test/takes-fence-fallback.test.ts:
  - Every preflight skill mentions both takes_add (preferred) and
    put_page fence-block (fallback for pre-T8 gbrain versions)
  - All 5 skills gate on BRAIN_CALIBRATION_WRITEBACK flag + personal
    trust policy
  - Per-skill weight matches SKILL_CALIBRATION_WEIGHTS (E5)
  - Write-back emits the kind=bet frontmatter shape and invalidates
    affected cache digests

test/skill-preflight-budget.test.ts (T21 / D7):
  - Per-skill BRAIN_* instruction bytes stay under 3x the runtime
    digest budget (resolver bloat catch)
  - Autoplan total instruction bytes stay under 75 KB (3x of 25 KB
    runtime cap)
  - Non-preflight skills emit zero brain bytes
  - Per-skill subset references are present in the preflight bash

Note on the 3x multiplier: SKILL_PREFLIGHT_BUDGET_BYTES governs runtime
digest data (enforced by cache CLI truncateToBudget). Instruction text
emitted by the resolver gets a separate 3x headroom — anything beyond
that signals the instructions themselves are bloated and need a trim.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan 2026-05-26 23:14:16 -07:00
parent 0c635919cd
commit e884617b7c
No known key found for this signature in database
GPG Key ID: C1F69E85C74EFE1D
3 changed files with 285 additions and 0 deletions

View File

@ -0,0 +1,102 @@
/**
* Schema-version cache migration (D4 A4 / T19).
*
* When gstack-core@1.x.y bumps and the cached _meta.json records an older
* schema_version, the cache layer triggers a FULL rebuild for the affected
* scope (not just delete-the-stale-file). Verifies the rebuild path is
* invoked AND the cache files for that scope are wiped before refresh.
*
* Gate-tier, free, ~50ms.
*/
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync } from 'fs';
import { join } from 'path';
import { tmpdir } from 'os';
import { GSTACK_SCHEMA_PACK_VERSION } from '../scripts/brain-cache-spec';
let TMP_HOME: string;
const ORIGINAL_HOME = process.env.GSTACK_HOME;
beforeEach(() => {
TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-schema-test-'));
process.env.GSTACK_HOME = TMP_HOME;
delete require.cache[require.resolve('../bin/gstack-brain-cache')];
});
afterEach(() => {
if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
else delete process.env.GSTACK_HOME;
try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
});
async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
}
describe('schema-version cache migration (D4 A4)', () => {
test('cache file with mismatched schema_version triggers wipe-and-rebuild attempt', async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
const stalePath = join(cacheDir, 'product.md');
writeFileSync(stalePath, '# stale-from-old-schema\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '0.5.0', // old version
endpoint_hash: 'local',
last_refresh: { product: Date.now() }, // fresh by TTL
last_attempt: {},
}));
// cmdGet should detect schema mismatch and try to rebuild. Since brain is
// unreachable in the test env, the rebuild fails and the stale file is
// gone (wiped during the rebuild attempt).
mod.cmdGet('product', 'helsinki');
// After rebuild attempt with unreachable brain, the stale file is wiped
// and _meta.json shows the current schema_version.
expect(existsSync(stalePath)).toBe(false);
const newMeta = JSON.parse(readFileSync(join(cacheDir, '_meta.json'), 'utf-8'));
expect(newMeta.schema_version).toBe(GSTACK_SCHEMA_PACK_VERSION);
});
test('matching schema_version + fresh TTL is warm hit (no rebuild)', async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
const productPath = join(cacheDir, 'product.md');
writeFileSync(productPath, '# fresh content\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: GSTACK_SCHEMA_PACK_VERSION,
endpoint_hash: mod.detectEndpointHash(),
last_refresh: { product: Date.now() },
last_attempt: {},
}));
const result = mod.cmdGet('product', 'helsinki');
expect(result.state).toBe('warm');
expect(readFileSync(result.path, 'utf-8')).toBe('# fresh content\n');
});
test('rebuild wipes ALL files in scope, not just the one being read', async () => {
const mod = await importCache();
const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
mkdirSync(cacheDir, { recursive: true });
writeFileSync(join(cacheDir, 'product.md'), '# stale product\n');
writeFileSync(join(cacheDir, 'brand.md'), '# stale brand\n');
writeFileSync(join(cacheDir, 'developer-persona.md'), '# stale persona\n');
writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
schema_version: '0.5.0',
endpoint_hash: 'local',
last_refresh: { product: Date.now(), brand: Date.now(), 'developer-persona': Date.now() },
last_attempt: {},
}));
mod.cmdGet('product', 'helsinki');
// All per-project files wiped (rebuild attempt cleared the scope)
expect(existsSync(join(cacheDir, 'product.md'))).toBe(false);
expect(existsSync(join(cacheDir, 'brand.md'))).toBe(false);
expect(existsSync(join(cacheDir, 'developer-persona.md'))).toBe(false);
});
});

View File

@ -0,0 +1,96 @@
/**
* Per-skill brain preflight token budget enforcement (T21 / T19).
*
* Asserts that the GENERATED BRAIN_PREFLIGHT block per skill stays within
* its per-skill byte budget (SKILL_PREFLIGHT_BUDGET_BYTES from
* brain-cache-spec). Also asserts the autoplan-wide total stays under
* AUTOPLAN_PREFLIGHT_BUDGET_BYTES.
*
* What's being measured: the SIZE OF THE INSTRUCTIONS injected into the
* skill's SKILL.md by the resolver, NOT the size of the cache digests at
* runtime. Runtime digest budgets are enforced separately by the cache
* CLI's truncateToBudget. This test catches resolver-side bloat: if
* generateBrainPreflight grows verbose, the instructions themselves eat
* the skill's context budget.
*
* Gate-tier, free.
*/
import { describe, test, expect } from 'bun:test';
import { generateBrainPreflight, generateBrainCacheRefresh, generateBrainWriteBack } from '../scripts/resolvers/gbrain';
import {
SKILL_DIGEST_SUBSETS,
SKILL_PREFLIGHT_BUDGET_BYTES,
AUTOPLAN_PREFLIGHT_BUDGET_BYTES,
} from '../scripts/brain-cache-spec';
import { HOST_PATHS } from '../scripts/resolvers/types';
import type { TemplateContext } from '../scripts/resolvers/types';
function buildCtx(skillName: string): TemplateContext {
return {
skillName,
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
host: 'claude',
paths: HOST_PATHS.claude,
};
}
function totalBrainBytes(skillName: string): number {
const preflight = generateBrainPreflight(buildCtx(skillName));
const refresh = generateBrainCacheRefresh(buildCtx(skillName));
const writeBack = generateBrainWriteBack(buildCtx(skillName));
return Buffer.byteLength(preflight + refresh + writeBack, 'utf-8');
}
describe('per-skill preflight token budget', () => {
test('every preflight skill stays under per-skill BRAIN_* budget (3x cap, instructions vs runtime data)', () => {
// The per-skill budget governs RUNTIME digest data, not instruction text.
// Instruction text (resolver output) should fit within 3x the runtime
// budget — anything more means the instructions themselves are bloated.
for (const [skill, budget] of Object.entries(SKILL_PREFLIGHT_BUDGET_BYTES)) {
const bytes = totalBrainBytes(skill);
const cap = budget * 3;
expect(bytes).toBeLessThanOrEqual(cap);
}
});
test('autoplan: sum across 4 plan-* skills stays under AUTOPLAN_PREFLIGHT_BUDGET_BYTES × 3 (instructions)', () => {
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
// Same 3x rationale: AUTOPLAN budget governs runtime data, instructions
// get more headroom.
expect(total).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES * 3);
});
test('non-preflight skills emit zero brain bytes', () => {
const nonPlanning = ['ship', 'qa', 'investigate', 'retro', 'design-review'];
for (const skill of nonPlanning) {
expect(totalBrainBytes(skill)).toBe(0);
}
});
test('preflight bytes are positive for every registered preflight skill', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
expect(totalBrainBytes(skill)).toBeGreaterThan(0);
}
});
});
describe('autoplan total preflight budget (T21 / D7)', () => {
test('autoplan total under 25 KB instruction cap × 3 (75 KB instruction budget)', () => {
const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
// The 75 KB cap on instructions across the 4-skill autoplan; runtime
// digest budget is the lower 25 KB cap, separately tested above.
expect(total).toBeLessThan(75 * 1024);
});
test('per-skill subset emits its expected entity references in the preflight block', () => {
for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) {
const preflight = generateBrainPreflight(buildCtx(skill));
for (const entity of subset) {
expect(preflight).toContain(`gstack-brain-cache get ${entity}`);
}
}
});
});

View File

@ -0,0 +1,87 @@
/**
* Phase 2 calibration write-back fence-block fallback (T19).
*
* The BRAIN_WRITE_BACK resolver output describes two paths:
* 1. Preferred: mcp__gbrain__takes_add op (upstream gbrain v0.42+, T8)
* 2. Fallback: mcp__gbrain__put_page with a gstack:takes fence block
*
* Until T8 ships, the fallback is the only path. Verify the resolver output
* mentions the fence-block fallback explicitly so the agent knows what to
* do when takes_add returns MCPMethodNotFound.
*
* Gate-tier, free, pure import + render.
*/
import { describe, test, expect } from 'bun:test';
import { generateBrainWriteBack } from '../scripts/resolvers/gbrain';
import { SKILL_DIGEST_SUBSETS, SKILL_CALIBRATION_WEIGHTS } from '../scripts/brain-cache-spec';
import { HOST_PATHS } from '../scripts/resolvers/types';
import type { TemplateContext } from '../scripts/resolvers/types';
function buildCtx(skillName: string): TemplateContext {
return {
skillName,
tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
host: 'claude',
paths: HOST_PATHS.claude,
};
}
describe('Phase 2 write-back fence-block fallback', () => {
test('every preflight skill emits write-back with fallback path documented', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
// Mentions takes_add (preferred)
expect(out).toContain('takes_add');
// Mentions put_page fallback
expect(out).toContain('put_page');
// Mentions the takes fence-block syntax
expect(out).toContain('takes');
}
});
test('write-back guidance gates on BRAIN_CALIBRATION_WRITEBACK feature flag', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
expect(out).toContain('BRAIN_CALIBRATION_WRITEBACK');
}
});
test('write-back guidance gates on brain_trust_policy == personal', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const out = generateBrainWriteBack(buildCtx(skill));
expect(out).toContain('personal');
expect(out).toContain('brain_trust_policy');
}
});
test('write-back emits the kind=bet take frontmatter shape', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(out).toContain('kind: bet');
expect(out).toContain('holder:');
expect(out).toContain('claim:');
expect(out).toContain('weight:');
expect(out).toContain('since_date:');
expect(out).toContain('expected_resolution:');
expect(out).toContain('source_skill:');
});
test('per-skill weight matches SKILL_CALIBRATION_WEIGHTS', () => {
for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
const weight = SKILL_CALIBRATION_WEIGHTS[skill];
if (weight == null) continue;
const out = generateBrainWriteBack(buildCtx(skill));
expect(out).toContain(`weight: ${weight}`);
}
});
test('write-back invalidates affected cache digests after write', () => {
const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
expect(out).toContain('gstack-brain-cache invalidate');
});
test('non-preflight skill gets empty write-back (no Phase 2 path)', () => {
expect(generateBrainWriteBack(buildCtx('ship'))).toBe('');
expect(generateBrainWriteBack(buildCtx('qa'))).toBe('');
});
});