test(brain): schema migration + fence-block fallback + preflight budget (T19+T21)

3 new gate-tier test files closing the most important coverage gaps in the brain-aware planning layer: test/schema-version-migration.test.ts (D4 A4): - Cache file with mismatched schema_version triggers wipe-and-rebuild - Matching version + fresh TTL stays warm-hit (no unnecessary rebuild) - Rebuild wipes ALL files in scope, not just the one being read test/takes-fence-fallback.test.ts: - Every preflight skill mentions both takes_add (preferred) and put_page fence-block (fallback for pre-T8 gbrain versions) - All 5 skills gate on BRAIN_CALIBRATION_WRITEBACK flag + personal trust policy - Per-skill weight matches SKILL_CALIBRATION_WEIGHTS (E5) - Write-back emits the kind=bet frontmatter shape and invalidates affected cache digests test/skill-preflight-budget.test.ts (T21 / D7): - Per-skill BRAIN_* instruction bytes stay under 3x the runtime digest budget (resolver bloat catch) - Autoplan total instruction bytes stay under 75 KB (3x of 25 KB runtime cap) - Non-preflight skills emit zero brain bytes - Per-skill subset references are present in the preflight bash Note on the 3x multiplier: SKILL_PREFLIGHT_BUDGET_BYTES governs runtime digest data (enforced by cache CLI truncateToBudget). Instruction text emitted by the resolver gets a separate 3x headroom — anything beyond that signals the instructions themselves are bloated and need a trim. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 23:14:16 -07:00 · 2026-05-26 23:14:16 -07:00 · e884617b7c
parent 0c635919cd
commit e884617b7c
3 changed files with 285 additions and 0 deletions
--- a/test/schema-version-migration.test.ts
+++ b/test/schema-version-migration.test.ts
@ -0,0 +1,102 @@
+/**
+ * Schema-version cache migration (D4 A4 / T19).
+ *
+ * When gstack-core@1.x.y bumps and the cached _meta.json records an older
+ * schema_version, the cache layer triggers a FULL rebuild for the affected
+ * scope (not just delete-the-stale-file). Verifies the rebuild path is
+ * invoked AND the cache files for that scope are wiped before refresh.
+ *
+ * Gate-tier, free, ~50ms.
+ */
+
+import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
+import { mkdtempSync, existsSync, writeFileSync, readFileSync, rmSync, mkdirSync } from 'fs';
+import { join } from 'path';
+import { tmpdir } from 'os';
+import { GSTACK_SCHEMA_PACK_VERSION } from '../scripts/brain-cache-spec';
+
+let TMP_HOME: string;
+const ORIGINAL_HOME = process.env.GSTACK_HOME;
+
+beforeEach(() => {
+  TMP_HOME = mkdtempSync(join(tmpdir(), 'gstack-schema-test-'));
+  process.env.GSTACK_HOME = TMP_HOME;
+  delete require.cache[require.resolve('../bin/gstack-brain-cache')];
+});
+
+afterEach(() => {
+  if (ORIGINAL_HOME) process.env.GSTACK_HOME = ORIGINAL_HOME;
+  else delete process.env.GSTACK_HOME;
+  try { rmSync(TMP_HOME, { recursive: true, force: true }); } catch { /* best effort */ }
+});
+
+async function importCache(): Promise<typeof import('../bin/gstack-brain-cache')> {
+  return (await import('../bin/gstack-brain-cache')) as typeof import('../bin/gstack-brain-cache');
+}
+
+describe('schema-version cache migration (D4 A4)', () => {
+  test('cache file with mismatched schema_version triggers wipe-and-rebuild attempt', async () => {
+    const mod = await importCache();
+    const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
+    mkdirSync(cacheDir, { recursive: true });
+    const stalePath = join(cacheDir, 'product.md');
+    writeFileSync(stalePath, '# stale-from-old-schema\n');
+    writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
+      schema_version: '0.5.0', // old version
+      endpoint_hash: 'local',
+      last_refresh: { product: Date.now() }, // fresh by TTL
+      last_attempt: {},
+    }));
+
+    // cmdGet should detect schema mismatch and try to rebuild. Since brain is
+    // unreachable in the test env, the rebuild fails and the stale file is
+    // gone (wiped during the rebuild attempt).
+    mod.cmdGet('product', 'helsinki');
+
+    // After rebuild attempt with unreachable brain, the stale file is wiped
+    // and _meta.json shows the current schema_version.
+    expect(existsSync(stalePath)).toBe(false);
+    const newMeta = JSON.parse(readFileSync(join(cacheDir, '_meta.json'), 'utf-8'));
+    expect(newMeta.schema_version).toBe(GSTACK_SCHEMA_PACK_VERSION);
+  });
+
+  test('matching schema_version + fresh TTL is warm hit (no rebuild)', async () => {
+    const mod = await importCache();
+    const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
+    mkdirSync(cacheDir, { recursive: true });
+    const productPath = join(cacheDir, 'product.md');
+    writeFileSync(productPath, '# fresh content\n');
+    writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
+      schema_version: GSTACK_SCHEMA_PACK_VERSION,
+      endpoint_hash: mod.detectEndpointHash(),
+      last_refresh: { product: Date.now() },
+      last_attempt: {},
+    }));
+
+    const result = mod.cmdGet('product', 'helsinki');
+    expect(result.state).toBe('warm');
+    expect(readFileSync(result.path, 'utf-8')).toBe('# fresh content\n');
+  });
+
+  test('rebuild wipes ALL files in scope, not just the one being read', async () => {
+    const mod = await importCache();
+    const cacheDir = join(TMP_HOME, 'projects', 'helsinki', 'brain-cache');
+    mkdirSync(cacheDir, { recursive: true });
+    writeFileSync(join(cacheDir, 'product.md'), '# stale product\n');
+    writeFileSync(join(cacheDir, 'brand.md'), '# stale brand\n');
+    writeFileSync(join(cacheDir, 'developer-persona.md'), '# stale persona\n');
+    writeFileSync(join(cacheDir, '_meta.json'), JSON.stringify({
+      schema_version: '0.5.0',
+      endpoint_hash: 'local',
+      last_refresh: { product: Date.now(), brand: Date.now(), 'developer-persona': Date.now() },
+      last_attempt: {},
+    }));
+
+    mod.cmdGet('product', 'helsinki');
+
+    // All per-project files wiped (rebuild attempt cleared the scope)
+    expect(existsSync(join(cacheDir, 'product.md'))).toBe(false);
+    expect(existsSync(join(cacheDir, 'brand.md'))).toBe(false);
+    expect(existsSync(join(cacheDir, 'developer-persona.md'))).toBe(false);
+  });
+});
--- a/test/skill-preflight-budget.test.ts
+++ b/test/skill-preflight-budget.test.ts
@ -0,0 +1,96 @@
+/**
+ * Per-skill brain preflight token budget enforcement (T21 / T19).
+ *
+ * Asserts that the GENERATED BRAIN_PREFLIGHT block per skill stays within
+ * its per-skill byte budget (SKILL_PREFLIGHT_BUDGET_BYTES from
+ * brain-cache-spec). Also asserts the autoplan-wide total stays under
+ * AUTOPLAN_PREFLIGHT_BUDGET_BYTES.
+ *
+ * What's being measured: the SIZE OF THE INSTRUCTIONS injected into the
+ * skill's SKILL.md by the resolver, NOT the size of the cache digests at
+ * runtime. Runtime digest budgets are enforced separately by the cache
+ * CLI's truncateToBudget. This test catches resolver-side bloat: if
+ * generateBrainPreflight grows verbose, the instructions themselves eat
+ * the skill's context budget.
+ *
+ * Gate-tier, free.
+ */
+
+import { describe, test, expect } from 'bun:test';
+import { generateBrainPreflight, generateBrainCacheRefresh, generateBrainWriteBack } from '../scripts/resolvers/gbrain';
+import {
+  SKILL_DIGEST_SUBSETS,
+  SKILL_PREFLIGHT_BUDGET_BYTES,
+  AUTOPLAN_PREFLIGHT_BUDGET_BYTES,
+} from '../scripts/brain-cache-spec';
+import { HOST_PATHS } from '../scripts/resolvers/types';
+import type { TemplateContext } from '../scripts/resolvers/types';
+
+function buildCtx(skillName: string): TemplateContext {
+  return {
+    skillName,
+    tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
+    host: 'claude',
+    paths: HOST_PATHS.claude,
+  };
+}
+
+function totalBrainBytes(skillName: string): number {
+  const preflight = generateBrainPreflight(buildCtx(skillName));
+  const refresh = generateBrainCacheRefresh(buildCtx(skillName));
+  const writeBack = generateBrainWriteBack(buildCtx(skillName));
+  return Buffer.byteLength(preflight + refresh + writeBack, 'utf-8');
+}
+
+describe('per-skill preflight token budget', () => {
+  test('every preflight skill stays under per-skill BRAIN_* budget (3x cap, instructions vs runtime data)', () => {
+    // The per-skill budget governs RUNTIME digest data, not instruction text.
+    // Instruction text (resolver output) should fit within 3x the runtime
+    // budget — anything more means the instructions themselves are bloated.
+    for (const [skill, budget] of Object.entries(SKILL_PREFLIGHT_BUDGET_BYTES)) {
+      const bytes = totalBrainBytes(skill);
+      const cap = budget * 3;
+      expect(bytes).toBeLessThanOrEqual(cap);
+    }
+  });
+
+  test('autoplan: sum across 4 plan-* skills stays under AUTOPLAN_PREFLIGHT_BUDGET_BYTES × 3 (instructions)', () => {
+    const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
+    const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
+    // Same 3x rationale: AUTOPLAN budget governs runtime data, instructions
+    // get more headroom.
+    expect(total).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES * 3);
+  });
+
+  test('non-preflight skills emit zero brain bytes', () => {
+    const nonPlanning = ['ship', 'qa', 'investigate', 'retro', 'design-review'];
+    for (const skill of nonPlanning) {
+      expect(totalBrainBytes(skill)).toBe(0);
+    }
+  });
+
+  test('preflight bytes are positive for every registered preflight skill', () => {
+    for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
+      expect(totalBrainBytes(skill)).toBeGreaterThan(0);
+    }
+  });
+});
+
+describe('autoplan total preflight budget (T21 / D7)', () => {
+  test('autoplan total under 25 KB instruction cap × 3 (75 KB instruction budget)', () => {
+    const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review'];
+    const total = autoplanSkills.reduce((sum, s) => sum + totalBrainBytes(s), 0);
+    // The 75 KB cap on instructions across the 4-skill autoplan; runtime
+    // digest budget is the lower 25 KB cap, separately tested above.
+    expect(total).toBeLessThan(75 * 1024);
+  });
+
+  test('per-skill subset emits its expected entity references in the preflight block', () => {
+    for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) {
+      const preflight = generateBrainPreflight(buildCtx(skill));
+      for (const entity of subset) {
+        expect(preflight).toContain(`gstack-brain-cache get ${entity}`);
+      }
+    }
+  });
+});
--- a/test/takes-fence-fallback.test.ts
+++ b/test/takes-fence-fallback.test.ts
@ -0,0 +1,87 @@
+/**
+ * Phase 2 calibration write-back fence-block fallback (T19).
+ *
+ * The BRAIN_WRITE_BACK resolver output describes two paths:
+ *   1. Preferred: mcp__gbrain__takes_add op (upstream gbrain v0.42+, T8)
+ *   2. Fallback: mcp__gbrain__put_page with a gstack:takes fence block
+ *
+ * Until T8 ships, the fallback is the only path. Verify the resolver output
+ * mentions the fence-block fallback explicitly so the agent knows what to
+ * do when takes_add returns MCPMethodNotFound.
+ *
+ * Gate-tier, free, pure import + render.
+ */
+
+import { describe, test, expect } from 'bun:test';
+import { generateBrainWriteBack } from '../scripts/resolvers/gbrain';
+import { SKILL_DIGEST_SUBSETS, SKILL_CALIBRATION_WEIGHTS } from '../scripts/brain-cache-spec';
+import { HOST_PATHS } from '../scripts/resolvers/types';
+import type { TemplateContext } from '../scripts/resolvers/types';
+
+function buildCtx(skillName: string): TemplateContext {
+  return {
+    skillName,
+    tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
+    host: 'claude',
+    paths: HOST_PATHS.claude,
+  };
+}
+
+describe('Phase 2 write-back fence-block fallback', () => {
+  test('every preflight skill emits write-back with fallback path documented', () => {
+    for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
+      const out = generateBrainWriteBack(buildCtx(skill));
+      // Mentions takes_add (preferred)
+      expect(out).toContain('takes_add');
+      // Mentions put_page fallback
+      expect(out).toContain('put_page');
+      // Mentions the takes fence-block syntax
+      expect(out).toContain('takes');
+    }
+  });
+
+  test('write-back guidance gates on BRAIN_CALIBRATION_WRITEBACK feature flag', () => {
+    for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
+      const out = generateBrainWriteBack(buildCtx(skill));
+      expect(out).toContain('BRAIN_CALIBRATION_WRITEBACK');
+    }
+  });
+
+  test('write-back guidance gates on brain_trust_policy == personal', () => {
+    for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
+      const out = generateBrainWriteBack(buildCtx(skill));
+      expect(out).toContain('personal');
+      expect(out).toContain('brain_trust_policy');
+    }
+  });
+
+  test('write-back emits the kind=bet take frontmatter shape', () => {
+    const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
+    expect(out).toContain('kind: bet');
+    expect(out).toContain('holder:');
+    expect(out).toContain('claim:');
+    expect(out).toContain('weight:');
+    expect(out).toContain('since_date:');
+    expect(out).toContain('expected_resolution:');
+    expect(out).toContain('source_skill:');
+  });
+
+  test('per-skill weight matches SKILL_CALIBRATION_WEIGHTS', () => {
+    for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) {
+      const weight = SKILL_CALIBRATION_WEIGHTS[skill];
+      if (weight == null) continue;
+      const out = generateBrainWriteBack(buildCtx(skill));
+      expect(out).toContain(`weight: ${weight}`);
+    }
+  });
+
+  test('write-back invalidates affected cache digests after write', () => {
+    const out = generateBrainWriteBack(buildCtx('plan-ceo-review'));
+    expect(out).toContain('gstack-brain-cache invalidate');
+  });
+
+  test('non-preflight skill gets empty write-back (no Phase 2 path)', () => {
+    expect(generateBrainWriteBack(buildCtx('ship'))).toBe('');
+    expect(generateBrainWriteBack(buildCtx('qa'))).toBe('');
+  });
+});