mirror of https://github.com/garrytan/gstack.git
test(plan-tune): regression coverage for v1.49 consent + setup gates
Plan-tune cathedral T2 + part of T1 follow-up (Codex IRON RULE — regressions get tests). v1.49 shipped two prose-driven implicit gates inside plan-tune Step 0 (consent, setup) with zero test coverage. The cathedral refactors that template heavily; without tests, silent breakage is possible. Three regression families plus a static template assertion: 1. Consent gate fires under qt=false + no marker; goes silent on marker write or qt=true flip. 2. Setup gate fires under qt=true + empty declared + no marker; goes silent when declared populates, marker is written, or qt is still false. 3. Marker idempotency: gates stay silent across 5 re-invocations after a single decline/bail. Markers honored independently. 4. Static template assertion: gate language can't be silently deleted without breaking a test. Also extends gstack-config to honor GSTACK_STATE_ROOT (it was the last bin still ignoring it — caught while writing the tests; without this, tests would silently mutate the user's real config.yaml). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
065a8b14b7
commit
9e0e185fe2
|
|
@ -8,11 +8,13 @@
|
|||
# gstack-config defaults — show just the defaults table
|
||||
#
|
||||
# Env overrides (for testing):
|
||||
# GSTACK_STATE_ROOT — override ~/.gstack state directory (highest priority,
|
||||
# matches D16 cathedral isolation convention)
|
||||
# GSTACK_HOME — override ~/.gstack state directory (aligns with writer scripts)
|
||||
# GSTACK_STATE_DIR — legacy alias for GSTACK_HOME (kept for backwards compat)
|
||||
set -euo pipefail
|
||||
|
||||
STATE_DIR="${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}"
|
||||
STATE_DIR="${GSTACK_STATE_ROOT:-${GSTACK_HOME:-${GSTACK_STATE_DIR:-$HOME/.gstack}}}"
|
||||
CONFIG_FILE="$STATE_DIR/config.yaml"
|
||||
|
||||
# Annotated header for new config files. Written once on first `set`.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,212 @@
|
|||
/**
|
||||
* Plan-tune v1.49 gate regression tests.
|
||||
*
|
||||
* v1.49 shipped two prose-driven implicit gates inside plan-tune/SKILL.md.tmpl
|
||||
* Step 0:
|
||||
* - Consent gate: question_tuning=false AND ~/.gstack/.question-tuning-prompted missing
|
||||
* → run "Consent + opt-in".
|
||||
* - Setup gate: question_tuning=true AND declared empty AND
|
||||
* ~/.gstack/.declared-setup-prompted missing → run "5-Q setup".
|
||||
*
|
||||
* The gates are evaluated by the agent reading the template's bash + prose.
|
||||
* The cathedral (T5/T6) replaces enforcement with hooks, but it must NOT break
|
||||
* these v1.49 gates — they're the only path from "feature off" to "feature on"
|
||||
* for first-time users.
|
||||
*
|
||||
* Three regression tests, all FREE tier, IRON RULE (no opt-out):
|
||||
* 1. consent-gate fires under the right conditions and stops re-firing after marker.
|
||||
* 2. setup-gate fires under the right conditions and stops re-firing after marker.
|
||||
* 3. marker idempotency: re-invoking after either decision produces zero re-prompts.
|
||||
*
|
||||
* Strategy: exercise the helpers the gates depend on (gstack-config get,
|
||||
* developer-profile.json schema, marker file paths). If those break, the
|
||||
* gates break. Plus a static-template assertion so the gate language can't
|
||||
* be silently deleted from the template.
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as os from 'os';
|
||||
import { spawnSync } from 'child_process';
|
||||
|
||||
const ROOT = path.resolve(import.meta.dir, '..');
|
||||
const BIN_CONFIG = path.join(ROOT, 'bin', 'gstack-config');
|
||||
const BIN_DEV = path.join(ROOT, 'bin', 'gstack-developer-profile');
|
||||
const SKILL_TMPL = path.join(ROOT, 'plan-tune', 'SKILL.md.tmpl');
|
||||
|
||||
let stateRoot: string;
|
||||
|
||||
beforeEach(() => {
|
||||
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-gate-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(stateRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
function runBin(
|
||||
bin: string,
|
||||
args: string[],
|
||||
): { stdout: string; stderr: string; status: number } {
|
||||
const env: Record<string, string> = {};
|
||||
for (const [k, v] of Object.entries(process.env)) {
|
||||
if (v !== undefined) env[k] = v;
|
||||
}
|
||||
env.GSTACK_STATE_ROOT = stateRoot;
|
||||
delete env.GSTACK_HOME;
|
||||
const res = spawnSync(bin, args, { env, encoding: 'utf-8', cwd: ROOT });
|
||||
return {
|
||||
stdout: res.stdout ?? '',
|
||||
stderr: res.stderr ?? '',
|
||||
status: res.status ?? -1,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate the consent-gate check as the agent would evaluate it from
|
||||
* the template's Step 0 prose. Mirrors exactly the conditions in
|
||||
* plan-tune/SKILL.md.tmpl §"Implicit gates run first" → "Consent gate."
|
||||
*/
|
||||
function evaluateConsentGate(): boolean {
|
||||
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
|
||||
const markerPath = path.join(stateRoot, '.question-tuning-prompted');
|
||||
return qt === 'false' && !fs.existsSync(markerPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulate the setup-gate check. Mirrors plan-tune/SKILL.md.tmpl §"Setup gate."
|
||||
*/
|
||||
function evaluateSetupGate(): boolean {
|
||||
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
|
||||
const profilePath = path.join(stateRoot, 'developer-profile.json');
|
||||
let declaredEmpty = true;
|
||||
if (fs.existsSync(profilePath)) {
|
||||
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
|
||||
declaredEmpty = !profile.declared || Object.keys(profile.declared).length === 0;
|
||||
}
|
||||
const markerPath = path.join(stateRoot, '.declared-setup-prompted');
|
||||
return qt === 'true' && declaredEmpty && !fs.existsSync(markerPath);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 1: consent gate fires + idempotent on marker write
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 consent gate', () => {
|
||||
test('fires when question_tuning=false AND no marker', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
expect(evaluateConsentGate()).toBe(true);
|
||||
});
|
||||
|
||||
test('does NOT fire after marker is written (decline path)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
});
|
||||
|
||||
test('does NOT fire after question_tuning flipped to true (accept path)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 2: setup gate fires + idempotent on marker write
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 setup gate', () => {
|
||||
test('fires when question_tuning=true AND declared empty AND no marker', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
// --read creates a stub profile with empty declared.
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
expect(evaluateSetupGate()).toBe(true);
|
||||
});
|
||||
|
||||
test('does NOT fire after declared populated (post-setup)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
// Simulate setup completion: populate declared.
|
||||
const profilePath = path.join(stateRoot, 'developer-profile.json');
|
||||
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
|
||||
profile.declared = {
|
||||
scope_appetite: 0.85,
|
||||
risk_tolerance: 0.7,
|
||||
detail_preference: 0.5,
|
||||
autonomy: 0.5,
|
||||
architecture_care: 0.85,
|
||||
};
|
||||
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
});
|
||||
|
||||
test('does NOT fire after marker is written even if declared still empty (bail path)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
});
|
||||
|
||||
test('does NOT fire when question_tuning still false (consent comes first)', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 3: marker idempotency across re-invocations
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 marker idempotency', () => {
|
||||
test('consent gate stays silent across 5 re-invocations after one decline', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
test('setup gate stays silent across 5 re-invocations after one bail', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
|
||||
for (let i = 0; i < 5; i++) {
|
||||
expect(evaluateSetupGate()).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
test('both markers honored independently', () => {
|
||||
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
||||
runBin(BIN_DEV, ['--read']);
|
||||
// Touch consent marker only; setup gate should still fire.
|
||||
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
||||
expect(evaluateConsentGate()).toBe(false);
|
||||
expect(evaluateSetupGate()).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// Test 4: static-template assertion (catches accidental deletion of gate prose)
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
describe('v1.49 gate prose survives in skill template', () => {
|
||||
const tmpl = fs.readFileSync(SKILL_TMPL, 'utf-8');
|
||||
|
||||
test('Consent gate condition is present', () => {
|
||||
expect(tmpl).toMatch(/Consent gate/i);
|
||||
expect(tmpl).toMatch(/question-tuning-prompted/);
|
||||
expect(tmpl).toMatch(/question_tuning.*false/);
|
||||
});
|
||||
|
||||
test('Setup gate condition is present', () => {
|
||||
expect(tmpl).toMatch(/Setup gate/i);
|
||||
expect(tmpl).toMatch(/declared-setup-prompted/);
|
||||
expect(tmpl).toMatch(/declared.*empty/i);
|
||||
});
|
||||
|
||||
test('marker writes documented for both gates', () => {
|
||||
expect(tmpl).toMatch(/touch.*question-tuning-prompted/);
|
||||
expect(tmpl).toMatch(/touch.*declared-setup-prompted/);
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue