From 181e4576cde81072ad4b48021b6b1d1a46d0c3bb Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Wed, 27 May 2026 08:34:44 -0700 Subject: [PATCH] chore(brain): touchfiles + TODOS + CHANGELOG for v1.50.0.0 test/helpers/touchfiles.ts: register the two new E2Es in E2E_TOUCHFILES + E2E_TIERS (both periodic): - office-hours-brain-writeback: triggered by resolver / gen-pipeline / detection helper / refresh subcommand / office-hours template / docs / fixture / test file changes - gbrain-roundtrip-local: triggered by resolver / test file changes TODOS.md: append two P2 follow-ups carried over from the v1.50 plan: - Re-verify calibration takes when gbrain v0.42+ ships takes_add and BRAIN_CALIBRATION_WRITEBACK flips TRUE - Extend brain-writeback E2E to the other 4 planning skills (extract makeFakeGbrain to test/helpers/fake-gbrain.ts when second consumer arrives) CHANGELOG.md v1.50.0.0: add a "Save-results path: works under any CLI when gbrain is on PATH" section that documents the headline: - Conditional inclusion at setup-time (zero overhead for non-gbrain users, ~250 tokens with gbrain) - Wiring symmetry fix (5 of 5 planning skills now write a page) - Token cost table comparing detection states - Test coverage map (resolver unit + override mechanism + fake-CLI agent obedience + real PGLite round-trip) - Why remote routing isn't tested here (gbrain's contract) Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 21 +++++++++++++++++ TODOS.md | 47 ++++++++++++++++++++++++++++++++++++++ test/helpers/touchfiles.ts | 36 +++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b984215d..b84b18ec2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,27 @@ Phase 2 calibration write-back is gated behind the `BRAIN_CALIBRATION_WRITEBACK` - Upstream gbrain dependency for Phase 2: `takes_add` + `takes_resolve` MCP ops in `~/git/gbrain/` (filed as P2 in TODOS.md). Phase 2 wiring already exists behind `BRAIN_CALIBRATION_WRITEBACK` flag; flag flips when upstream lands. - Plan / CEO + eng review record: `~/.claude/plans/hm-interesting-well-why-dapper-eagle.md` (Approach B + 5 cherry-picks + 11 D-decisions from full eng review + codex outside-voice synthesis). +### Save-results path: works under any CLI when gbrain is on PATH + +Brain-aware planning saves the actual review document to gbrain, not just preflight digests and calibration takes. Setup detects gbrain at install time and, if present, the planning skills emit compressed `gbrain put "/"` instructions for `office-hours/`, `ceo-plans/`, `eng-reviews/`, `design-reviews/`, and `devex-reviews/` slug spaces. If gbrain is not detected, the save-results block is suppressed entirely. Zero token overhead for users without gbrain. If you install gbrain after running `./setup`, run `gstack-config gbrain-refresh` to pick up the change. + +Token cost stays tight: the inline save-results block is ~150 tokens per planning skill (down from ~1000 a naive un-suppression would have added). The full save template (heredoc body, entity-stub instructions, throttle handling, backlinks) lives in `docs/gbrain-write-surfaces.md` §Save Template and the agent reads it on demand only when it actually saves. Same compression discipline for the brain-context-load block: ~115 tokens with skip-header pointing to §Context Load. + +| Detection state | Per-planning-skill token overhead | What the agent does on save | +|---|---|---| +| gbrain on PATH + `gstack-config gbrain-refresh` says `local_status: "ok"` | ~250 tokens (CONTEXT_LOAD + SAVE_RESULTS, compressed) | reads `docs/gbrain-write-surfaces.md` on demand, calls `gbrain put /` | +| gbrain not on PATH | 0 tokens | block suppressed at gen-time, nothing rendered | +| GBrain or Hermes host adapter | full inline render (unchanged) | calls `gbrain put` always | + +Wired for all five planning skills uniformly: `office-hours`, `plan-ceo-review`, `plan-eng-review`, `plan-design-review`, `plan-devex-review`. The last two gained the `{{GBRAIN_SAVE_RESULTS}}` placeholder in their templates (previously only the first three had it, so design-review and devex-review produced no retrievable page even under GBrain CLI). + +Coverage: a free resolver-level unit test pins per-skill slug + tag metadata + the compressed token budget (`test/resolvers-gbrain-save-results.test.ts`, 10 tests / 53 assertions); a free override-mechanism test asserts the detection file gates resolver rendering correctly across `detected: true`, `detected: false`, and `no file` states (`test/gbrain-detection-override.test.ts`, 4 tests); a periodic-tier fake-CLI E2E drives `/office-hours` against a stub `gbrain` on PATH and asserts the agent actually calls `gbrain put office-hours/` with valid YAML frontmatter (`test/skill-e2e-office-hours-brain-writeback.test.ts`, ~$0.50-1/run); a periodic-tier real-CLI round-trip drives `gbrain init --pglite` + `gbrain put` + `gbrain get` against an isolated temp HOME and asserts the body survives (`test/skill-e2e-gbrain-roundtrip-local.test.ts`, ~$0.001/run, skips if `VOYAGE_API_KEY` is unset). Together: the agent obeys the resolver instruction, the resolver emits a valid CLI shape, and the CLI persists the page on the local engine. Remote/Supabase routing is gbrain's contract to honor — the same CLI shape covers all engines, so gstack stops at local round-trip coverage. + +**For contributors (save-results layer):** +- `bin/gstack-config gbrain-refresh` re-runs `bin/gstack-gbrain-detect` and writes `~/.gstack/gbrain-detection.json`. `./setup` runs this at the end of install and conditionally regenerates Claude-host SKILL.md with `bun run gen:skill-docs:user` (added package.json script) so detected installs get the brain blocks immediately. +- The default `bun run gen:skill-docs` (CI canonical) ignores the detection file. Committed SKILL.md stays reproducible regardless of any developer's local gbrain state. Use `bun run gen:skill-docs:user` for user-local installs. +- Two follow-ups deferred to `TODOS.md` (P2): re-verify calibration takes when gbrain v0.42+ ships `takes_add` (the `BRAIN_CALIBRATION_WRITEBACK` flag flips); extend the brain-writeback E2E to the other 4 planning skills. + ## [1.48.0.0] - 2026-05-26 ## **Agents stop dropping AskUserQuestion options when there are 5+.** A new canonical preamble rule + runtime gate makes Conductor's 4-option cap a split-or-batch decision, not a silent trim. diff --git a/TODOS.md b/TODOS.md index 06342b6d5..553041f90 100644 --- a/TODOS.md +++ b/TODOS.md @@ -2033,3 +2033,50 @@ until users report stale digests where a background refresh silently failed. **Effort:** S (human ~2h, CC ~20min) + +### P2: Re-verify calibration takes when gbrain v0.42+ lands + +**What:** When upstream gbrain ships `takes_add` MCP op and we flip +`BRAIN_CALIBRATION_WRITEBACK` from FALSE to TRUE, re-run the manual +probe in `docs/gbrain-write-surfaces.md` against `/office-hours` and +confirm `gbrain takes_list` surfaces a `kind=bet` entry with the +expected weight (0.9 for office-hours, per +`scripts/brain-cache-spec.ts:151-157`). + +**Why:** Today the calibration take path falls back to writing inside a +`gbrain put` fence block because `takes_add` isn't available yet. Once +v0.42+ ships, the agent will call `takes_add` directly — we should +confirm the new path actually persists a queryable take. + +**Context:** v1.50.0.0 plan §"NOT in scope". The fence-block fallback +test (`test/takes-fence-fallback.test.ts`) covers wiring for both paths; +this TODO is about live verification of the preferred path when it +becomes available. + +**Effort:** XS (human ~15min, CC ~5min) + +**Depends on:** Upstream gbrain v0.42+ release shipping `takes_add` MCP +op (separate TODO above). + +### P2: Extend brain-writeback E2E to the other 4 planning skills + +**What:** `test/skill-e2e-office-hours-brain-writeback.test.ts` covers +the brain-writeback path for `/office-hours` only. Adding parallel +tests for `/plan-ceo-review`, `/plan-eng-review`, `/plan-design-review`, +and `/plan-devex-review` would bring per-skill agent-obedience coverage +to parity with the resolver unit test +(`test/resolvers-gbrain-save-results.test.ts`, which covers wiring for +all 5). + +**Why:** The resolver test proves the right instructions get emitted; +the E2E proves the agent actually obeys. Today we only have that +end-to-end signal for one of five planning skills. + +**Context:** v1.50.0.0 plan §"NOT in scope". Extract `makeFakeGbrain` +into `test/helpers/fake-gbrain.ts` when the second consumer arrives +(YAGNI for one consumer today). + +**Effort:** S (human ~1d, CC ~1h). Periodic-tier (~$2-4 total for 4 +runs). + +**Depends on:** None. diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index 359da2b6f..8d4ddf3d9 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -378,6 +378,35 @@ export const E2E_TOUCHFILES: Record = { // /spec end-to-end via PTY — exercises the full Phase 1→5 pipeline // including --execute spawn. Periodic-tier — paid + non-deterministic. 'spec-execute': ['spec/**', 'test/skill-e2e-spec-execute.test.ts'], + + // /office-hours brain-writeback path under fake gbrain CLI (v1.50.0.0 + // T7). Drives /office-hours with a regenerated SKILL.md that has the + // compressed GBRAIN_SAVE_RESULTS block + a fake gbrain on PATH; asserts + // the agent calls `gbrain put office-hours/` with valid YAML + // frontmatter. Touched by anything that changes resolver output, gen + // pipeline, detection helper, refresh subcommand, or the on-demand + // docs the resolver points to. + 'office-hours-brain-writeback': [ + 'scripts/resolvers/gbrain.ts', + 'scripts/gen-skill-docs.ts', + 'bin/gstack-gbrain-detect', + 'bin/gstack-config', + 'office-hours/SKILL.md.tmpl', + 'docs/gbrain-write-surfaces.md', + 'test/fixtures/office-hours-brain-writeback/**', + 'test/skill-e2e-office-hours-brain-writeback.test.ts', + ], + + // gbrain CLI real round-trip against a local PGLite store (v1.50.0.0 + // T11). Proves the gbrain CLI persistence contract gstack relies on — + // a `gbrain put` followed by `gbrain get` returns the body. Skips if + // VOYAGE_API_KEY is unset OR gbrain CLI not on PATH. Touched by the + // resolver (which emits the CLI shape) and the test itself. + 'gbrain-roundtrip-local': [ + 'scripts/resolvers/gbrain.ts', + 'test/skill-e2e-gbrain-roundtrip-local.test.ts', + ], + }; /** @@ -425,6 +454,13 @@ export const E2E_TIERS: Record = { // Office Hours 'office-hours-spec-review': 'gate', + // Brain-writeback E2E — periodic per cost (claude -p) + non-deterministic + // (model interprets the gbrain instruction). Matches nearby + // setup-gbrain-path4-* tier classification. + 'office-hours-brain-writeback': 'periodic', + // GBrain CLI round-trip — periodic per Voyage embedding cost (~$0.001/run) + // and external-API-dependency (skips cleanly if VOYAGE_API_KEY unset). + 'gbrain-roundtrip-local': 'periodic', 'office-hours-forcing-energy': 'gate', // V1.1 mode-posture regression gate (Sonnet generator) // 'office-hours-builder-wildness' retiered to periodic in v1.32 contributor // wave: this is an LLM-judge creativity score (axis_a ≥4 on a "wildness"