mirror of https://github.com/garrytan/gstack.git
70 lines
3.6 KiB
TypeScript
70 lines
3.6 KiB
TypeScript
/**
|
|
* Static guard for cross-model synthesis recommendation emit instructions.
|
|
*
|
|
* v1.25.1.0+ extended the AskUserQuestion recommendation-quality coverage
|
|
* to cross-model skills (/codex review/challenge/consult, the Claude
|
|
* adversarial subagent, and the Codex adversarial pass). Each surface MUST
|
|
* tell the model to end its synthesis with a canonical
|
|
* `Recommendation: <action> because <reason>`
|
|
* line so judgeRecommendation can grade it (see test/llm-judge-recommendation
|
|
* for the rubric exercise).
|
|
*
|
|
* Free, deterministic, single-purpose: if any contributor edits these
|
|
* templates and removes the emit instruction, this test trips before the
|
|
* change reaches a paid eval. The runtime grading still happens via
|
|
* judgeRecommendation when the skills run for real; this test just pins the
|
|
* source of truth.
|
|
*/
|
|
import { describe, test, expect } from 'bun:test';
|
|
import * as fs from 'fs';
|
|
import * as path from 'path';
|
|
|
|
const ROOT = path.resolve(import.meta.dir, '..');
|
|
|
|
describe('cross-model synthesis emit instructions', () => {
|
|
test('codex/SKILL.md.tmpl Step 2A (review) requires a synthesis Recommendation', () => {
|
|
const tmpl = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md.tmpl'), 'utf-8');
|
|
const step2a = sliceBetween(tmpl, '## Step 2A:', '## Step 2B:');
|
|
expect(step2a, 'Step 2A section not found in codex template').not.toBe('');
|
|
expect(step2a).toMatch(/Synthesis recommendation \(REQUIRED\)/);
|
|
expect(step2a).toMatch(/Recommendation:\s*<action>\s*because/);
|
|
});
|
|
|
|
test('codex/SKILL.md.tmpl Step 2B (challenge) requires a synthesis Recommendation', () => {
|
|
const tmpl = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md.tmpl'), 'utf-8');
|
|
const step2b = sliceBetween(tmpl, '## Step 2B:', '## Step 2C:');
|
|
expect(step2b, 'Step 2B section not found in codex template').not.toBe('');
|
|
expect(step2b).toMatch(/Synthesis recommendation \(REQUIRED\)/);
|
|
expect(step2b).toMatch(/Recommendation:\s*<action>\s*because/);
|
|
});
|
|
|
|
test('codex/SKILL.md.tmpl Step 2C (consult) requires a synthesis Recommendation', () => {
|
|
const tmpl = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md.tmpl'), 'utf-8');
|
|
const step2c = sliceBetween(tmpl, '## Step 2C:', '## Model & Reasoning');
|
|
expect(step2c, 'Step 2C section not found in codex template').not.toBe('');
|
|
expect(step2c).toMatch(/Synthesis recommendation \(REQUIRED\)/);
|
|
expect(step2c).toMatch(/Recommendation:\s*<action>\s*because/);
|
|
});
|
|
|
|
test('scripts/resolvers/review.ts Claude adversarial subagent prompt requires Recommendation', () => {
|
|
const resolver = fs.readFileSync(path.join(ROOT, 'scripts', 'resolvers', 'review.ts'), 'utf-8');
|
|
// The Claude subagent prompt must instruct the model to emit a final
|
|
// canonical Recommendation line.
|
|
expect(resolver).toMatch(/Claude adversarial subagent[\s\S]+?Recommendation:\s*<action>\s*because/);
|
|
});
|
|
|
|
test('scripts/resolvers/review.ts Codex adversarial command requires Recommendation', () => {
|
|
const resolver = fs.readFileSync(path.join(ROOT, 'scripts', 'resolvers', 'review.ts'), 'utf-8');
|
|
// The codex exec command's prompt string must include the emit
|
|
// instruction. Match within the codex adversarial section.
|
|
expect(resolver).toMatch(/Codex adversarial challenge[\s\S]+?Recommendation:\s*<action>\s*because/);
|
|
});
|
|
});
|
|
|
|
function sliceBetween(text: string, startMarker: string, endMarker: string): string {
|
|
const start = text.indexOf(startMarker);
|
|
if (start < 0) return '';
|
|
const end = text.indexOf(endMarker, start + startMarker.length);
|
|
return end > start ? text.slice(start, end) : text.slice(start);
|
|
}
|