From 7c82ec055ee7e7b8fcf61e7b0ad690ebd808ae80 Mon Sep 17 00:00:00 2001
From: Garry Tan <garrytan@gmail.com>
Date: Wed, 27 May 2026 08:34:27 -0700
Subject: [PATCH] test(brain): resolver compression + detection-override
 regression pins
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

test/resolvers-gbrain-save-results.test.ts (140 LOC, 10 tests):
  - Per-skill assertions for all 5 planning skills: emits gbrain put +
    correct slug prefix + tag + title.
  - Skip-header present so agent can short-circuit when gbrain isn't
    on PATH.
  - Compression pin: each per-skill block stays under 750 chars
    (~190 tokens) — guards against a future "let me add one more
    line" refactor silently re-inflating toward the ~1000-token naive
    un-suppression baseline.
  - Generic fallback for unmapped skill names still works.
  - /investigate gets the data-research routing suffix; non-investigate
    skills do not.
  - generateGBrainContextLoad stays under 500 chars (~125 tokens).

test/gbrain-detection-override.test.ts (120 LOC, 4 tests):
  - End-to-end through gen-skill-docs subprocess against an isolated
    temp GSTACK_HOME. Asserts:
    * detected:true un-suppresses GBRAIN_* → SKILL.md gains the block
    * detected:false (status != "ok") suppresses → no block
    * no detection file suppresses → no block (graceful default)
    * no --respect-detection flag IGNORES the detection file → no
      block (CI canonical path stays reproducible)

Each detection-override test restores the canonical SKILL.md in a
finally block so the working tree stays clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/gbrain-detection-override.test.ts     | 193 +++++++++++++++++++++
 test/resolvers-gbrain-save-results.test.ts | 137 +++++++++++++++
 2 files changed, 330 insertions(+)
 create mode 100644 test/gbrain-detection-override.test.ts
 create mode 100644 test/resolvers-gbrain-save-results.test.ts

diff --git a/test/gbrain-detection-override.test.ts b/test/gbrain-detection-override.test.ts
new file mode 100644
index 000000000..b1b13ccbf
--- /dev/null
+++ b/test/gbrain-detection-override.test.ts
@@ -0,0 +1,193 @@
+/**
+ * Regression pin for the setup-time gbrain detection → gen-skill-docs
+ * override (T2 / v1.50.0.0).
+ *
+ * The override mechanism lives in scripts/gen-skill-docs.ts: when invoked
+ * with --respect-detection, it reads ~/.gstack/gbrain-detection.json and
+ * un-suppresses GBRAIN_CONTEXT_LOAD + GBRAIN_SAVE_RESULTS for hosts that
+ * statically list them in suppressedResolvers (claude, codex, slate,
+ * factory, opencode, openclaw, cursor, kiro).
+ *
+ * Tests drive gen-skill-docs as a subprocess against a temp GSTACK_HOME
+ * with each detection state, then assert what landed in the generated
+ * Claude-host SKILL.md. This is end-to-end through the actual override
+ * pipeline — no mocking — so it catches regressions in either the loader
+ * or the suppressedResolvers filter.
+ *
+ * Gate-tier, free, ~3-5s per test (gen-skill-docs runs the full skill
+ * generation against the real repo; --host claude scopes to one host).
+ */
+
+import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
+import { execFileSync } from 'child_process';
+import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'fs';
+import { tmpdir } from 'os';
+import { join } from 'path';
+
+const REPO_ROOT = join(import.meta.dir, '..');
+
+interface FixtureEnv {
+  tmpHome: string;
+  cleanup: () => void;
+}
+
+function makeFixture(detectionJson: string | null): FixtureEnv {
+  const tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-detect-test-'));
+  if (detectionJson !== null) {
+    writeFileSync(join(tmpHome, 'gbrain-detection.json'), detectionJson);
+  }
+  return {
+    tmpHome,
+    cleanup: () => {
+      try {
+        rmSync(tmpHome, { recursive: true, force: true });
+      } catch {
+        // best effort
+      }
+    },
+  };
+}
+
+/**
+ * Run gen-skill-docs with --respect-detection and an isolated GSTACK_HOME.
+ * Returns the regenerated office-hours/SKILL.md content WITHOUT writing
+ * over the committed file: we use --dry-run to keep the working tree
+ * clean, then parse the output via re-reading the committed file... no,
+ * that doesn't work for dry-run since dry-run doesn't write.
+ *
+ * Approach: generate to a temp output dir by running gen-skill-docs in a
+ * temp checkout. Simpler alternative: actually regenerate, snapshot the
+ * file content, then git-checkout the committed version back. We use this
+ * since gen-skill-docs doesn't expose an output-path arg.
+ */
+function regenAndSnapshot(opts: {
+  respectDetection: boolean;
+  tmpHome: string;
+  files: string[];
+}): Map<string, string> {
+  // Save committed content so we can restore after snapshotting.
+  const original = new Map<string, string>();
+  for (const f of opts.files) {
+    original.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
+  }
+
+  const args = [
+    'run',
+    'scripts/gen-skill-docs.ts',
+    '--host',
+    'claude',
+  ];
+  if (opts.respectDetection) args.push('--respect-detection');
+
+  try {
+    execFileSync('bun', args, {
+      cwd: REPO_ROOT,
+      env: { ...process.env, GSTACK_HOME: opts.tmpHome },
+      stdio: ['ignore', 'pipe', 'pipe'],
+      timeout: 30_000,
+    });
+
+    // Snapshot the regenerated content.
+    const snapshot = new Map<string, string>();
+    for (const f of opts.files) {
+      snapshot.set(f, readFileSync(join(REPO_ROOT, f), 'utf-8'));
+    }
+    return snapshot;
+  } finally {
+    // Always restore so the test leaves the working tree clean.
+    for (const [f, content] of original) {
+      writeFileSync(join(REPO_ROOT, f), content);
+    }
+  }
+}
+
+describe('gbrain detection override → gen-skill-docs', () => {
+  // Single skill probe is enough to assert the override pipeline. The
+  // resolver unit test (test/resolvers-gbrain-save-results.test.ts) covers
+  // per-skill metadata correctness already.
+  const PROBE_FILES = ['office-hours/SKILL.md'];
+
+  test('with detected:true, Claude-host SKILL.md gains brain-aware blocks', () => {
+    const { tmpHome, cleanup } = makeFixture(
+      JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
+    );
+    try {
+      const snap = regenAndSnapshot({
+        respectDetection: true,
+        tmpHome,
+        files: PROBE_FILES,
+      });
+      const content = snap.get('office-hours/SKILL.md')!;
+
+      // GBRAIN_SAVE_RESULTS un-suppressed → resolver output rendered.
+      expect(content).toContain('## Save Results to Brain');
+      expect(content).toContain('gbrain put "office-hours/');
+      expect(content).toContain('Skip this entire section if `gbrain` is not on PATH');
+
+      // GBRAIN_CONTEXT_LOAD also un-suppressed (D6 bundling).
+      expect(content).toContain('## Brain Context Load');
+    } finally {
+      cleanup();
+    }
+  });
+
+  test('with detected:false (status != "ok"), brain blocks stay suppressed', () => {
+    const { tmpHome, cleanup } = makeFixture(
+      JSON.stringify({ gbrain_local_status: 'no-cli', gbrain_on_path: false, gbrain_version: null }),
+    );
+    try {
+      const snap = regenAndSnapshot({
+        respectDetection: true,
+        tmpHome,
+        files: PROBE_FILES,
+      });
+      const content = snap.get('office-hours/SKILL.md')!;
+
+      // GBRAIN_SAVE_RESULTS suppressed → no rendered block, no gbrain put line.
+      expect(content).not.toContain('gbrain put "office-hours/');
+      // Section header from the resolver also absent (resolver returns "").
+      // BUT — the BRAIN_CACHE_REFRESH and BRAIN_WRITE_BACK resolvers are NOT
+      // gated by detection (host-agnostic), so other "Brain ..." sections may
+      // still appear. We only assert the SAVE_RESULTS-specific marker is gone.
+    } finally {
+      cleanup();
+    }
+  });
+
+  test('with NO detection file, brain blocks stay suppressed (same as detected:false)', () => {
+    const { tmpHome, cleanup } = makeFixture(null);
+    try {
+      const snap = regenAndSnapshot({
+        respectDetection: true,
+        tmpHome,
+        files: PROBE_FILES,
+      });
+      const content = snap.get('office-hours/SKILL.md')!;
+      expect(content).not.toContain('gbrain put "office-hours/');
+    } finally {
+      cleanup();
+    }
+  });
+
+  test('without --respect-detection flag, detection file is IGNORED (CI canonical path)', () => {
+    // Even if a detection file exists with detected:true, the default
+    // `bun run gen:skill-docs` (CI) must produce no-gbrain output so the
+    // committed SKILL.md stays reproducible regardless of any developer's
+    // local gbrain install state.
+    const { tmpHome, cleanup } = makeFixture(
+      JSON.stringify({ gbrain_local_status: 'ok', gbrain_on_path: true, gbrain_version: 'test-0.41.0' }),
+    );
+    try {
+      const snap = regenAndSnapshot({
+        respectDetection: false,
+        tmpHome,
+        files: PROBE_FILES,
+      });
+      const content = snap.get('office-hours/SKILL.md')!;
+      expect(content).not.toContain('gbrain put "office-hours/');
+      expect(content).not.toContain('## Save Results to Brain');
+    } finally {
+      cleanup();
+    }
+  });
+});
diff --git a/test/resolvers-gbrain-save-results.test.ts b/test/resolvers-gbrain-save-results.test.ts
new file mode 100644
index 000000000..c697262d0
--- /dev/null
+++ b/test/resolvers-gbrain-save-results.test.ts
@@ -0,0 +1,137 @@
+/**
+ * Resolver regression pin for generateGBrainSaveResults +
+ * generateGBrainContextLoad (compressed in v1.50.0.0).
+ *
+ * Two coverage stories:
+ *   1. **Wiring symmetry**: all 5 planning skills (office-hours, plan-ceo-review,
+ *      plan-eng-review, plan-design-review, plan-devex-review) get the correct
+ *      slug prefix + tag in the emitted save instructions.
+ *   2. **Token-budget pin**: post-compression, each block stays under a chars
+ *      ceiling so a future "let me just add one more line" refactor doesn't
+ *      silently re-inflate the prompt cost back toward the ~1000-token
+ *      naive-un-suppression baseline.
+ *
+ * Gate-tier, free, pure import + render — no host generation, no claude -p.
+ */
+
+import { describe, test, expect } from 'bun:test';
+import {
+  generateGBrainContextLoad,
+  generateGBrainSaveResults,
+} from '../scripts/resolvers/gbrain';
+import { HOST_PATHS } from '../scripts/resolvers/types';
+import type { TemplateContext } from '../scripts/resolvers/types';
+
+function buildCtx(skillName: string): TemplateContext {
+  return {
+    skillName,
+    tmplPath: `/tmp/${skillName}/SKILL.md.tmpl`,
+    host: 'claude',
+    paths: HOST_PATHS.claude,
+  };
+}
+
+// Per-skill expected slug prefix + tag. If you add a new planning skill,
+// add it here AND in scripts/resolvers/gbrain.ts skillSaveMap. If you rename
+// one, this test will fail loudly — that's the regression pin working.
+const PLANNING_SKILLS: Array<{ skill: string; slugPrefix: string; tag: string; title: string }> = [
+  { skill: 'office-hours',       slugPrefix: 'office-hours/',    tag: 'design-doc',    title: 'Office Hours' },
+  { skill: 'plan-ceo-review',    slugPrefix: 'ceo-plans/',       tag: 'ceo-plan',      title: 'CEO Plan' },
+  { skill: 'plan-eng-review',    slugPrefix: 'eng-reviews/',     tag: 'eng-review',    title: 'Eng Review' },
+  { skill: 'plan-design-review', slugPrefix: 'design-reviews/',  tag: 'design-review', title: 'Design Review' },
+  { skill: 'plan-devex-review',  slugPrefix: 'devex-reviews/',   tag: 'devex-review',  title: 'Devex Review' },
+];
+
+describe('generateGBrainSaveResults — wiring + compression pin', () => {
+  test.each(PLANNING_SKILLS)(
+    '$skill emits gbrain put $slugPrefix... with $tag tag',
+    ({ skill, slugPrefix, tag, title }) => {
+      const out = generateGBrainSaveResults(buildCtx(skill));
+
+      // Uses gbrain put (v0.18+ subcommand), not deprecated put_page MCP op.
+      expect(out).toContain('gbrain put');
+      expect(out).not.toContain('put_page');
+
+      // Per-skill slug prefix is exactly what skillSaveMap declares.
+      expect(out).toContain(`"${slugPrefix}<feature-slug>"`);
+
+      // Title prefix + tag match the metadata.
+      expect(out).toContain(`title: "${title}:`);
+      expect(out).toContain(`tags: [${tag},`);
+
+      // Skip-header is present so agent can short-circuit when gbrain is absent.
+      expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
+
+      // Compact: points to docs/gbrain-write-surfaces.md for full template.
+      expect(out).toContain('docs/gbrain-write-surfaces.md');
+    },
+  );
+
+  test('all 5 planning skills produce output under ~600 chars (~150 tokens)', () => {
+    // Token-budget pin. Naive un-suppression would emit ~1000 tokens (~4000 chars)
+    // per skill. Compressed target: ~150 tokens (~600 chars). Generous ceiling
+    // at 750 chars to leave room for the heredoc structure without inviting a
+    // gradual re-inflation of the prose.
+    const CEILING_CHARS = 750;
+    for (const { skill } of PLANNING_SKILLS) {
+      const out = generateGBrainSaveResults(buildCtx(skill));
+      if (out.length > CEILING_CHARS) {
+        throw new Error(
+          `generateGBrainSaveResults('${skill}') emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
+            `exceeds ceiling of ${CEILING_CHARS} chars (~${Math.round(CEILING_CHARS / 4)} tokens). ` +
+            `If you added necessary content, move the verbose prose into ` +
+            `docs/gbrain-write-surfaces.md §Save Template (which the agent reads on demand) and ` +
+            `keep the inline block as a short pointer + per-skill metadata. ` +
+            `See gbrain.ts T4/v1.50.0.0 compression rationale.`,
+        );
+      }
+    }
+  });
+
+  test('unmapped skill name falls through to compact generic template', () => {
+    const out = generateGBrainSaveResults(buildCtx('no-such-skill'));
+
+    // Generic fallback still emits gbrain put + skip-header + docs pointer.
+    expect(out).toContain('gbrain put');
+    expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
+    expect(out).toContain('docs/gbrain-write-surfaces.md');
+
+    // Should NOT contain a per-skill slug prefix from the map (would mean we
+    // accidentally regressed to the per-skill path for an unmapped skill).
+    for (const { slugPrefix } of PLANNING_SKILLS) {
+      expect(out).not.toContain(`"${slugPrefix}<feature-slug>"`);
+    }
+  });
+});
+
+describe('generateGBrainContextLoad — compression pin', () => {
+  test('emits skip-header and docs pointer, stays under ~500 chars', () => {
+    // Same compression discipline as SAVE_RESULTS. Context load was ~350-450
+    // tokens before compression; target ~80 tokens (~320 chars). Ceiling
+    // generous at 500 chars to leave room for skill-specific suffixes.
+    const out = generateGBrainContextLoad(buildCtx('plan-ceo-review'));
+    expect(out).toContain('Skip this entire section if `gbrain` is not on PATH');
+    expect(out).toContain('docs/gbrain-write-surfaces.md');
+    expect(out).toContain('gbrain search');
+    expect(out).toContain('gbrain get_page');
+    if (out.length > 500) {
+      throw new Error(
+        `generateGBrainContextLoad emitted ${out.length} chars (~${Math.round(out.length / 4)} tokens), ` +
+          `exceeds ceiling of 500 chars (~125 tokens). ` +
+          `Move verbose prose to docs/gbrain-write-surfaces.md §Context Load.`,
+      );
+    }
+  });
+
+  test('/investigate gets the data-research routing suffix', () => {
+    const out = generateGBrainContextLoad(buildCtx('investigate'));
+    expect(out).toContain('data-research');
+  });
+
+  test('non-investigate skills do NOT get the data-research suffix', () => {
+    for (const { skill } of PLANNING_SKILLS) {
+      const out = generateGBrainContextLoad(buildCtx(skill));
+      expect(out).not.toContain('data-research');
+    }
+  });
+});