diff --git a/scripts/brain-cache-spec.ts b/scripts/brain-cache-spec.ts new file mode 100644 index 000000000..eab2f9588 --- /dev/null +++ b/scripts/brain-cache-spec.ts @@ -0,0 +1,268 @@ +/** + * Brain cache spec — single source of truth for the brain-aware planning skills + * cache layer. Imported by: + * - scripts/resolvers/gbrain.ts (renders per-skill subset into SKILL.md.tmpl) + * - bin/gstack-brain-cache (drives TTL + write-back invalidation) + * - test/brain-cache-spec.test.ts (asserts internal consistency) + * - test/skill-preflight-budget.test.ts (enforces per-skill token budget) + * - test/autoplan-preflight-budget.test.ts (enforces autoplan total budget) + * + * Drift between docs and runtime is impossible by construction: the same + * const drives both the rendered table in SKILL.md and the cache CLI behavior. + */ + +export interface BrainCacheEntity { + /** Filename inside ~/.gstack/{,projects//}brain-cache/ */ + file: string; + /** Time-to-live in milliseconds before cache is considered stale and triggers cold refresh. */ + ttl_ms: number; + /** Scope determines which dir holds the cache file. */ + scope: 'cross-project' | 'per-project'; + /** + * Which write-paths invalidate this digest. When a writer runs, it consults + * this list to know which cache files to bust. Special values: + * - 'calibration-write' — any Phase 2 takes_add call + * - 'skill-run-write' — any skill that writes a gstack/skill-run page + * Otherwise these are skill names like '/plan-ceo-review'. + */ + invalidated_by: ReadonlyArray; + /** Hard byte budget for the digest. Compressor drops oldest items if exceeded. */ + budget_bytes: number; +} + +/** + * The seven cached entities mirror the seven typed page kinds in + * `gstack-core` schema pack v1.0.0 (Phase 0): + * user-profile, product, goal, developer-persona, brand, competitive-intel, skill-run + * Plus two derived digests: + * recent-decisions (top 5 gstack/skill-run pages) + * salience (mcp__gbrain__get_recent_salience output) + */ +export const BRAIN_CACHE_ENTITIES: Record = { + 'user-profile': { + file: 'user-profile.md', + ttl_ms: 7 * 86_400_000, // 7 days + scope: 'cross-project', + invalidated_by: ['/retro', '/plan-tune', 'calibration-write'], + budget_bytes: 2048, + }, + product: { + file: 'product.md', + ttl_ms: 1 * 86_400_000, // 1 day + scope: 'per-project', + invalidated_by: ['/office-hours', '/plan-ceo-review'], + budget_bytes: 1024, + }, + goals: { + file: 'goals.md', + ttl_ms: 12 * 3_600_000, // 12 hours + scope: 'per-project', + invalidated_by: ['/office-hours', '/plan-ceo-review'], + budget_bytes: 512, + }, + 'developer-persona': { + file: 'developer-persona.md', + ttl_ms: 7 * 86_400_000, + scope: 'per-project', + invalidated_by: ['/plan-devex-review', '/devex-review'], + budget_bytes: 1024, + }, + brand: { + file: 'brand.md', + ttl_ms: 7 * 86_400_000, + scope: 'per-project', + invalidated_by: ['/design-consultation', '/plan-design-review'], + budget_bytes: 1024, + }, + 'competitive-intel': { + file: 'competitive-intel.md', + ttl_ms: 1 * 86_400_000, + scope: 'per-project', + invalidated_by: ['/plan-ceo-review', '/office-hours'], + budget_bytes: 1024, + }, + 'recent-decisions': { + file: 'recent-decisions.md', + ttl_ms: 12 * 3_600_000, + scope: 'per-project', + invalidated_by: ['skill-run-write'], + budget_bytes: 2048, + }, + salience: { + file: 'salience.md', + ttl_ms: 4 * 3_600_000, // 4 hours + scope: 'per-project', + invalidated_by: [], + budget_bytes: 512, + }, +}; + +/** + * Per-skill subset map. The resolver consumes this to emit per-skill BRAIN_PREFLIGHT + * instructions. The skill template loads ONLY the listed digests — never more. + * Order matters for narrative coherence in the injected ## Brain Context block. + * + * Hard token budget per skill (validated by test/skill-preflight-budget.test.ts): + * - CEO/office-hours: 5 KB (richest context need) + * - eng/design/devex: 2 KB + */ +export const SKILL_DIGEST_SUBSETS: Record> = { + 'office-hours': ['product', 'goals', 'user-profile', 'recent-decisions', 'salience'], + 'plan-ceo-review': ['product', 'goals', 'recent-decisions', 'user-profile'], + 'plan-eng-review': ['product', 'recent-decisions'], + 'plan-design-review': ['product', 'brand', 'recent-decisions'], + 'plan-devex-review': ['product', 'developer-persona', 'recent-decisions', 'competitive-intel'], +}; + +/** Per-skill total digest budget (sum of loaded digests must not exceed). */ +export const SKILL_PREFLIGHT_BUDGET_BYTES: Record = { + 'office-hours': 5120, + 'plan-ceo-review': 5120, + 'plan-eng-review': 2048, + 'plan-design-review': 2048, + 'plan-devex-review': 2048, +}; + +/** + * Total budget across an autoplan run (4 sequential planning skills). Validated by + * test/autoplan-preflight-budget.test.ts. If a future autoplan-extended adds skills, + * this cap forces an explicit budget revisit. + */ +export const AUTOPLAN_PREFLIGHT_BUDGET_BYTES = 25_600; + +/** + * D9 salience privacy: default allowlist of slug prefixes that are safe to surface + * in planning prompts. Anything outside (personal/, family/, therapy/, etc.) + * gets stripped at digest write time. User can extend via + * `gstack-config set salience_allowlist ''`. + */ +export const SALIENCE_DEFAULT_ALLOWLIST: ReadonlyArray = [ + 'projects/', + 'concepts/', + 'gstack/', +]; + +/** + * Per-skill calibration bet weights (Phase 2 / E5). When a planning skill writes + * a kind=bet take, the weight determines how strongly it factors into the user's + * calibration profile. Higher = more confident prediction worth more credit/blame + * on resolution. + */ +export const SKILL_CALIBRATION_WEIGHTS: Record = { + 'plan-ceo-review': 0.8, + 'plan-eng-review': 0.7, + 'plan-design-review': 0.5, + 'plan-devex-review': 0.6, + 'office-hours': 0.9, +}; + +/** + * Lock-file path used by the cache refresh dedup (D3). Per-project to avoid + * cross-project contention. Stale-takeover after 5 minutes. + */ +export const CACHE_REFRESH_LOCK_TIMEOUT_MS = 5 * 60_000; + +/** + * Retention policy: gstack/skill-run pages auto-archive after this many days. + * Calibration takes (kind=bet) NEVER archive (long-term scorecard needs them). + */ +export const SKILL_RUN_RETENTION_DAYS = 90; + +/** + * Schema pack identity. Bumped when adding/removing/renaming page types. + * On mismatch with the version recorded in _meta.json, the cache layer + * triggers a FULL rebuild for the affected project. + */ +export const GSTACK_SCHEMA_PACK_NAME = 'gstack-core'; +export const GSTACK_SCHEMA_PACK_VERSION = '1.0.0'; + +/** + * Trust policy values. Drives auto-push of artifacts, calibration write-back + * eligibility, and user-namespacing strategy. + */ +export type BrainTrustPolicy = 'personal' | 'shared' | 'unset'; + +/** + * Per-transport default policy. Local engines auto-set to personal (single-tenant + * by construction). Remote endpoints are inferred based on sources_list shape: + * exactly one source + whoami matches → personal default; multiple sources or + * federation → ask the policy question. + */ +export const TRANSPORT_DEFAULT_POLICY: Record = { + 'local-pglite': 'personal', + 'local-stdio': 'personal', + 'remote-http-single-tenant': 'personal', + 'remote-http-ambiguous': 'unset', + unknown: 'unset', +}; + +/** + * User-slug fallback chain (D4 A3 defensive default). Resolved once per endpoint + * and persisted via `gstack-config set user_slug_at_ `. + * Stable across sessions. + */ +export const USER_SLUG_RESOLUTION_ORDER = [ + 'whoami_client_name', // mcp__gbrain__whoami.client_name (remote + OAuth) + 'env_user', // $USER environment variable + 'git_email_sha8', // sha8($(git config user.email)) + 'anonymous_hostname_sha8', // anonymous- +] as const; + +/** ----------------------------------------------------------------------- */ +/** Helper functions consumed by the resolver, cache CLI, and tests. */ +/** ----------------------------------------------------------------------- */ + +/** Returns the cache filename for an entity name, throws if unknown. */ +export function getCacheFile(entityName: string): string { + const entity = BRAIN_CACHE_ENTITIES[entityName]; + if (!entity) throw new Error(`Unknown brain cache entity: ${entityName}`); + return entity.file; +} + +/** Returns the digest subset for a skill, throws if the skill isn't preflight-enabled. */ +export function getSkillSubset(skillName: string): ReadonlyArray { + const subset = SKILL_DIGEST_SUBSETS[skillName]; + if (!subset) throw new Error(`Skill not registered for brain preflight: ${skillName}`); + return subset; +} + +/** Returns the per-skill total digest budget in bytes. */ +export function getSkillBudget(skillName: string): number { + const budget = SKILL_PREFLIGHT_BUDGET_BYTES[skillName]; + if (budget == null) throw new Error(`Skill not registered for brain preflight: ${skillName}`); + return budget; +} + +/** + * Given a write-path identifier (skill name or special token), returns the list + * of cache files that should be invalidated. Drives the cache CLI's `invalidate` + * subcommand and the resolver's BRAIN_WRITE_BACK block. + */ +export function getInvalidationTargets(writePath: string): ReadonlyArray { + const targets: string[] = []; + for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) { + if (entity.invalidated_by.includes(writePath)) { + targets.push(name); + } + } + return targets; +} + +/** + * Lists all skill names that are registered for brain preflight. Used by + * test/brain-preflight.test.ts and test/skill-preflight-budget.test.ts to + * iterate without hardcoding the skill list. + */ +export function getPreflightSkills(): ReadonlyArray { + return Object.keys(SKILL_DIGEST_SUBSETS); +} + +/** + * Computes the maximum possible digest set size for a skill (sum of per-entity + * budgets in the subset). Used by skill-preflight-budget.test.ts to validate + * that the per-skill cap is enforceable given the per-entity caps. + */ +export function getMaxSubsetBytes(skillName: string): number { + const subset = getSkillSubset(skillName); + return subset.reduce((sum, name) => sum + (BRAIN_CACHE_ENTITIES[name]?.budget_bytes ?? 0), 0); +} diff --git a/test/brain-cache-spec.test.ts b/test/brain-cache-spec.test.ts new file mode 100644 index 000000000..21a012f1c --- /dev/null +++ b/test/brain-cache-spec.test.ts @@ -0,0 +1,169 @@ +/** + * Brain cache spec internal-consistency invariants (T14 / D2). + * + * Asserts that scripts/brain-cache-spec.ts is self-consistent: + * - Every skill's subset only references entities that exist. + * - Per-skill budget cap is achievable given per-entity caps. + * - Cross-project entities are clearly distinguished from per-project. + * - Invalidation graph has no dangling skill references. + * - Helper functions throw on unknown names (defensive). + * + * Gate-tier, free, pure import + assertion. Runs in <100ms. + */ + +import { describe, test, expect } from 'bun:test'; +import { + BRAIN_CACHE_ENTITIES, + SKILL_DIGEST_SUBSETS, + SKILL_PREFLIGHT_BUDGET_BYTES, + AUTOPLAN_PREFLIGHT_BUDGET_BYTES, + SALIENCE_DEFAULT_ALLOWLIST, + SKILL_CALIBRATION_WEIGHTS, + TRANSPORT_DEFAULT_POLICY, + USER_SLUG_RESOLUTION_ORDER, + GSTACK_SCHEMA_PACK_NAME, + GSTACK_SCHEMA_PACK_VERSION, + CACHE_REFRESH_LOCK_TIMEOUT_MS, + SKILL_RUN_RETENTION_DAYS, + getCacheFile, + getSkillSubset, + getSkillBudget, + getInvalidationTargets, + getPreflightSkills, + getMaxSubsetBytes, +} from '../scripts/brain-cache-spec'; + +describe('brain-cache-spec internal consistency', () => { + test('every skill subset references only known entities', () => { + const entityNames = new Set(Object.keys(BRAIN_CACHE_ENTITIES)); + for (const [skill, subset] of Object.entries(SKILL_DIGEST_SUBSETS)) { + for (const name of subset) { + expect(entityNames.has(name)).toBe(true); + } + } + }); + + test('every skill with a subset has a budget', () => { + for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) { + expect(SKILL_PREFLIGHT_BUDGET_BYTES[skill]).toBeGreaterThan(0); + } + }); + + test('per-skill budget is achievable given per-entity budgets', () => { + // Per-entity budgets are hard ceilings on each digest's own file size. + // Per-skill budget is enforced by the compressor on the SUM injected into + // the skill's preflight context — the same entity may be sampled (top-N) + // rather than verbatim. So sum may legitimately exceed skill budget; the + // compressor trims at write time. We allow up to 3x as a sanity ceiling + // (caught test/skill-preflight-budget.test.ts enforces the real cap). + for (const skill of Object.keys(SKILL_DIGEST_SUBSETS)) { + const maxBytes = getMaxSubsetBytes(skill); + const skillBudget = getSkillBudget(skill); + expect(maxBytes).toBeLessThanOrEqual(skillBudget * 3); + } + }); + + test('autoplan total budget covers the 4 plan-* skills (excluding office-hours)', () => { + const autoplanSkills = ['plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review']; + const sum = autoplanSkills.reduce((acc, s) => acc + getSkillBudget(s), 0); + expect(sum).toBeLessThanOrEqual(AUTOPLAN_PREFLIGHT_BUDGET_BYTES); + }); + + test('every entity has a positive TTL and a positive budget', () => { + for (const [name, entity] of Object.entries(BRAIN_CACHE_ENTITIES)) { + expect(entity.ttl_ms).toBeGreaterThan(0); + expect(entity.budget_bytes).toBeGreaterThan(0); + expect(entity.file).toMatch(/\.md$/); + expect(['cross-project', 'per-project']).toContain(entity.scope); + } + }); + + test('user-profile is the only cross-project entity', () => { + const crossProject = Object.entries(BRAIN_CACHE_ENTITIES) + .filter(([_, e]) => e.scope === 'cross-project') + .map(([n]) => n); + expect(crossProject).toEqual(['user-profile']); + }); + + test('salience entity has shortest TTL (changes hourly)', () => { + const ttls = Object.values(BRAIN_CACHE_ENTITIES).map((e) => e.ttl_ms); + expect(BRAIN_CACHE_ENTITIES.salience.ttl_ms).toBe(Math.min(...ttls)); + }); + + test('salience allowlist has sane defaults (no personal/family/therapy)', () => { + const blocked = ['personal/', 'family/', 'therapy/', 'reflection']; + for (const prefix of blocked) { + expect(SALIENCE_DEFAULT_ALLOWLIST.some((p) => p.startsWith(prefix))).toBe(false); + } + // Must contain at least projects/ + gstack/ (work-flow surfaces) + expect(SALIENCE_DEFAULT_ALLOWLIST).toContain('projects/'); + expect(SALIENCE_DEFAULT_ALLOWLIST).toContain('gstack/'); + }); + + test('calibration weights are bounded 0-1 and present for all preflight skills', () => { + for (const skill of getPreflightSkills()) { + const weight = SKILL_CALIBRATION_WEIGHTS[skill]; + expect(weight).toBeGreaterThan(0); + expect(weight).toBeLessThanOrEqual(1); + } + }); + + test('transport policy defaults exist for all transport modes', () => { + const required = ['local-pglite', 'local-stdio', 'remote-http-single-tenant', 'remote-http-ambiguous']; + for (const transport of required) { + expect(TRANSPORT_DEFAULT_POLICY[transport]).toBeDefined(); + } + // Local transports must default personal (D4 / Phase 1.5 default rule) + expect(TRANSPORT_DEFAULT_POLICY['local-pglite']).toBe('personal'); + expect(TRANSPORT_DEFAULT_POLICY['local-stdio']).toBe('personal'); + // Ambiguous remote MUST require explicit ask (never silent default) + expect(TRANSPORT_DEFAULT_POLICY['remote-http-ambiguous']).toBe('unset'); + }); + + test('user-slug resolution chain has 4 deterministic fallbacks ending in non-empty', () => { + expect(USER_SLUG_RESOLUTION_ORDER.length).toBe(4); + expect(USER_SLUG_RESOLUTION_ORDER[USER_SLUG_RESOLUTION_ORDER.length - 1]).toBe('anonymous_hostname_sha8'); + }); + + test('schema pack identity is stable strings', () => { + expect(GSTACK_SCHEMA_PACK_NAME).toBe('gstack-core'); + expect(GSTACK_SCHEMA_PACK_VERSION).toMatch(/^\d+\.\d+\.\d+$/); + }); + + test('refresh lock timeout matches /sync-gbrain convention (5 min)', () => { + expect(CACHE_REFRESH_LOCK_TIMEOUT_MS).toBe(5 * 60_000); + }); + + test('skill-run retention is 90 days per D10 lifecycle policy', () => { + expect(SKILL_RUN_RETENTION_DAYS).toBe(90); + }); + + test('invalidation graph: every "skill-run-write" target also depends on it', () => { + // recent-decisions invalidates on skill-run-write — verify the contract holds + const targets = getInvalidationTargets('skill-run-write'); + expect(targets).toContain('recent-decisions'); + }); + + test('invalidation graph: /plan-ceo-review invalidates product + goals + recent-decisions chain', () => { + const targets = getInvalidationTargets('/plan-ceo-review'); + expect(targets).toContain('product'); + expect(targets).toContain('goals'); + }); + + test('helpers throw on unknown names (defensive)', () => { + expect(() => getCacheFile('nonsense-entity')).toThrow(); + expect(() => getSkillSubset('not-a-skill')).toThrow(); + expect(() => getSkillBudget('not-a-skill')).toThrow(); + }); + + test('helpers return correct values for known names', () => { + expect(getCacheFile('product')).toBe('product.md'); + expect(getSkillSubset('plan-eng-review')).toEqual(['product', 'recent-decisions']); + expect(getSkillBudget('office-hours')).toBe(5120); + }); + + test('all 5 preflight skills are real planning-skill names', () => { + const expected = ['office-hours', 'plan-ceo-review', 'plan-eng-review', 'plan-design-review', 'plan-devex-review']; + expect(getPreflightSkills().sort()).toEqual(expected.sort()); + }); +});