mirror of https://github.com/garrytan/gstack.git
213 lines
8.2 KiB
TypeScript
213 lines
8.2 KiB
TypeScript
/**
|
|
* Plan-tune v1.49 gate regression tests.
|
|
*
|
|
* v1.49 shipped two prose-driven implicit gates inside plan-tune/SKILL.md.tmpl
|
|
* Step 0:
|
|
* - Consent gate: question_tuning=false AND ~/.gstack/.question-tuning-prompted missing
|
|
* → run "Consent + opt-in".
|
|
* - Setup gate: question_tuning=true AND declared empty AND
|
|
* ~/.gstack/.declared-setup-prompted missing → run "5-Q setup".
|
|
*
|
|
* The gates are evaluated by the agent reading the template's bash + prose.
|
|
* The cathedral (T5/T6) replaces enforcement with hooks, but it must NOT break
|
|
* these v1.49 gates — they're the only path from "feature off" to "feature on"
|
|
* for first-time users.
|
|
*
|
|
* Three regression tests, all FREE tier, IRON RULE (no opt-out):
|
|
* 1. consent-gate fires under the right conditions and stops re-firing after marker.
|
|
* 2. setup-gate fires under the right conditions and stops re-firing after marker.
|
|
* 3. marker idempotency: re-invoking after either decision produces zero re-prompts.
|
|
*
|
|
* Strategy: exercise the helpers the gates depend on (gstack-config get,
|
|
* developer-profile.json schema, marker file paths). If those break, the
|
|
* gates break. Plus a static-template assertion so the gate language can't
|
|
* be silently deleted from the template.
|
|
*/
|
|
|
|
import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
import * as os from 'os';
|
|
import { spawnSync } from 'child_process';
|
|
|
|
const ROOT = path.resolve(import.meta.dir, '..');
|
|
const BIN_CONFIG = path.join(ROOT, 'bin', 'gstack-config');
|
|
const BIN_DEV = path.join(ROOT, 'bin', 'gstack-developer-profile');
|
|
const SKILL_TMPL = path.join(ROOT, 'plan-tune', 'SKILL.md.tmpl');
|
|
|
|
let stateRoot: string;
|
|
|
|
beforeEach(() => {
|
|
stateRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-gate-'));
|
|
});
|
|
|
|
afterEach(() => {
|
|
fs.rmSync(stateRoot, { recursive: true, force: true });
|
|
});
|
|
|
|
function runBin(
|
|
bin: string,
|
|
args: string[],
|
|
): { stdout: string; stderr: string; status: number } {
|
|
const env: Record<string, string> = {};
|
|
for (const [k, v] of Object.entries(process.env)) {
|
|
if (v !== undefined) env[k] = v;
|
|
}
|
|
env.GSTACK_STATE_ROOT = stateRoot;
|
|
delete env.GSTACK_HOME;
|
|
const res = spawnSync(bin, args, { env, encoding: 'utf-8', cwd: ROOT });
|
|
return {
|
|
stdout: res.stdout ?? '',
|
|
stderr: res.stderr ?? '',
|
|
status: res.status ?? -1,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Simulate the consent-gate check as the agent would evaluate it from
|
|
* the template's Step 0 prose. Mirrors exactly the conditions in
|
|
* plan-tune/SKILL.md.tmpl §"Implicit gates run first" → "Consent gate."
|
|
*/
|
|
function evaluateConsentGate(): boolean {
|
|
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
|
|
const markerPath = path.join(stateRoot, '.question-tuning-prompted');
|
|
return qt === 'false' && !fs.existsSync(markerPath);
|
|
}
|
|
|
|
/**
|
|
* Simulate the setup-gate check. Mirrors plan-tune/SKILL.md.tmpl §"Setup gate."
|
|
*/
|
|
function evaluateSetupGate(): boolean {
|
|
const qt = runBin(BIN_CONFIG, ['get', 'question_tuning']).stdout.trim() || 'false';
|
|
const profilePath = path.join(stateRoot, 'developer-profile.json');
|
|
let declaredEmpty = true;
|
|
if (fs.existsSync(profilePath)) {
|
|
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
|
|
declaredEmpty = !profile.declared || Object.keys(profile.declared).length === 0;
|
|
}
|
|
const markerPath = path.join(stateRoot, '.declared-setup-prompted');
|
|
return qt === 'true' && declaredEmpty && !fs.existsSync(markerPath);
|
|
}
|
|
|
|
// ---------------------------------------------------------------
|
|
// Test 1: consent gate fires + idempotent on marker write
|
|
// ---------------------------------------------------------------
|
|
|
|
describe('v1.49 consent gate', () => {
|
|
test('fires when question_tuning=false AND no marker', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
|
expect(evaluateConsentGate()).toBe(true);
|
|
});
|
|
|
|
test('does NOT fire after marker is written (decline path)', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
|
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
|
expect(evaluateConsentGate()).toBe(false);
|
|
});
|
|
|
|
test('does NOT fire after question_tuning flipped to true (accept path)', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
|
expect(evaluateConsentGate()).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------
|
|
// Test 2: setup gate fires + idempotent on marker write
|
|
// ---------------------------------------------------------------
|
|
|
|
describe('v1.49 setup gate', () => {
|
|
test('fires when question_tuning=true AND declared empty AND no marker', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
|
// --read creates a stub profile with empty declared.
|
|
runBin(BIN_DEV, ['--read']);
|
|
expect(evaluateSetupGate()).toBe(true);
|
|
});
|
|
|
|
test('does NOT fire after declared populated (post-setup)', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
|
runBin(BIN_DEV, ['--read']);
|
|
// Simulate setup completion: populate declared.
|
|
const profilePath = path.join(stateRoot, 'developer-profile.json');
|
|
const profile = JSON.parse(fs.readFileSync(profilePath, 'utf-8'));
|
|
profile.declared = {
|
|
scope_appetite: 0.85,
|
|
risk_tolerance: 0.7,
|
|
detail_preference: 0.5,
|
|
autonomy: 0.5,
|
|
architecture_care: 0.85,
|
|
};
|
|
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
|
|
expect(evaluateSetupGate()).toBe(false);
|
|
});
|
|
|
|
test('does NOT fire after marker is written even if declared still empty (bail path)', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
|
runBin(BIN_DEV, ['--read']);
|
|
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
|
|
expect(evaluateSetupGate()).toBe(false);
|
|
});
|
|
|
|
test('does NOT fire when question_tuning still false (consent comes first)', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
|
runBin(BIN_DEV, ['--read']);
|
|
expect(evaluateSetupGate()).toBe(false);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------
|
|
// Test 3: marker idempotency across re-invocations
|
|
// ---------------------------------------------------------------
|
|
|
|
describe('v1.49 marker idempotency', () => {
|
|
test('consent gate stays silent across 5 re-invocations after one decline', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'false']);
|
|
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
|
for (let i = 0; i < 5; i++) {
|
|
expect(evaluateConsentGate()).toBe(false);
|
|
}
|
|
});
|
|
|
|
test('setup gate stays silent across 5 re-invocations after one bail', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
|
runBin(BIN_DEV, ['--read']);
|
|
fs.writeFileSync(path.join(stateRoot, '.declared-setup-prompted'), '');
|
|
for (let i = 0; i < 5; i++) {
|
|
expect(evaluateSetupGate()).toBe(false);
|
|
}
|
|
});
|
|
|
|
test('both markers honored independently', () => {
|
|
runBin(BIN_CONFIG, ['set', 'question_tuning', 'true']);
|
|
runBin(BIN_DEV, ['--read']);
|
|
// Touch consent marker only; setup gate should still fire.
|
|
fs.writeFileSync(path.join(stateRoot, '.question-tuning-prompted'), '');
|
|
expect(evaluateConsentGate()).toBe(false);
|
|
expect(evaluateSetupGate()).toBe(true);
|
|
});
|
|
});
|
|
|
|
// ---------------------------------------------------------------
|
|
// Test 4: static-template assertion (catches accidental deletion of gate prose)
|
|
// ---------------------------------------------------------------
|
|
|
|
describe('v1.49 gate prose survives in skill template', () => {
|
|
const tmpl = fs.readFileSync(SKILL_TMPL, 'utf-8');
|
|
|
|
test('Consent gate condition is present', () => {
|
|
expect(tmpl).toMatch(/Consent gate/i);
|
|
expect(tmpl).toMatch(/question-tuning-prompted/);
|
|
expect(tmpl).toMatch(/question_tuning.*false/);
|
|
});
|
|
|
|
test('Setup gate condition is present', () => {
|
|
expect(tmpl).toMatch(/Setup gate/i);
|
|
expect(tmpl).toMatch(/declared-setup-prompted/);
|
|
expect(tmpl).toMatch(/declared.*empty/i);
|
|
});
|
|
|
|
test('marker writes documented for both gates', () => {
|
|
expect(tmpl).toMatch(/touch.*question-tuning-prompted/);
|
|
expect(tmpl).toMatch(/touch.*declared-setup-prompted/);
|
|
});
|
|
});
|