From 6d48d23ba785ed297e733a4d2b81955b8d8f74c2 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 25 May 2026 20:37:08 -0700 Subject: [PATCH] =?UTF-8?q?test(cso):=20T6=20=E2=80=94=20pin=20must-preser?= =?UTF-8?q?ve=20security=20phrases=20(Phase=20A.5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cso/SKILL.md is a content-heavy security audit skill (75 KB after T3+T4). Codex 2nd-pass critique #9: "cso exemption too broad ... should still get resolver dedup, catalog trim, sectioning if safe, and targeted evals around must-not-miss checks." T3 (jargon dedup) and T4 (catalog trim) already applied to cso the same way they applied to every other skill — confirmed by inspection: - jargon list NOT inlined (0 inline term lines) - catalog description trimmed to one line (74 bytes vs 774 bytes baseline) - "## When to invoke" body section present T6 work: lock in the security-prose preservation via a gate-tier test that fails CI if future compression strips load-bearing phrases: - OWASP, STRIDE positioning - daily / comprehensive mode discipline - confidence scoring language - active verification ("verif" prefix catches verify/verified/verification) - ## Preamble heading (preamble resolver still fires) Also guards cso against accidental over-stripping: SKILL.md must stay ≥30 KB (currently 75 KB) — a sudden cliff would mean compression went past the targeted-dedup line into structural removal. No structural change to cso. Future Phase B sections/ work for cso requires writing baseline parity tests FIRST per the v2_PLAN.md sequencing. Test plan: - bun test test/cso-preserved.test.ts: 5 pass Co-Authored-By: Claude Opus 4.7 (1M context) --- test/cso-preserved.test.ts | 86 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 test/cso-preserved.test.ts diff --git a/test/cso-preserved.test.ts b/test/cso-preserved.test.ts new file mode 100644 index 000000000..83fe6bbc8 --- /dev/null +++ b/test/cso-preserved.test.ts @@ -0,0 +1,86 @@ +/** + * cso security-guidance preservation test (v1.45.0.0 T6). + * + * The cso skill carries load-bearing security prose: OWASP Top 10 mappings, + * STRIDE threat-model phrasing, "do not auto-fix without user approval" + * gates. Codex 2nd-pass critique #9: "cso exemption too broad ... should + * still get resolver dedup, catalog trim, sectioning if safe, and targeted + * evals around must-not-miss checks." + * + * This test pins the must-not-miss checks. cso gets the same resolver gate + * (T2), jargon dedup (T3), and catalog trim (T4) as every other skill — but + * its security-guidance body content stays intact. Future compression work + * that would strip this content fails CI here. + */ + +import { describe, test, expect } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; + +const REPO_ROOT = path.resolve(import.meta.dir, '..'); +const CSO_SKILL = path.join(REPO_ROOT, 'cso', 'SKILL.md'); + +const MUST_PRESERVE_PHRASES = [ + // OWASP / STRIDE positioning + 'OWASP', + 'STRIDE', + // Mode discipline + 'daily', + 'comprehensive', + // Severity language + 'confidence', + // Active verification requirement (codex critique: "active verification") + 'verif', // covers "verify", "verification", "verified" +]; + +const MUST_PRESERVE_HEADINGS = [ + '## Preamble', // from PREAMBLE resolver +]; + +describe('cso skill preserves load-bearing security guidance', () => { + test('cso/SKILL.md exists and is non-trivial', () => { + expect(fs.existsSync(CSO_SKILL)).toBe(true); + const content = fs.readFileSync(CSO_SKILL, 'utf-8'); + // cso is a content-heavy security skill; under 30 KB suggests stripping went too far. + expect(content.length).toBeGreaterThan(30_000); + }); + + test('cso preserves required security phrases (case-insensitive)', () => { + const content = fs.readFileSync(CSO_SKILL, 'utf-8').toLowerCase(); + const missing: string[] = []; + for (const phrase of MUST_PRESERVE_PHRASES) { + if (!content.includes(phrase.toLowerCase())) missing.push(phrase); + } + if (missing.length > 0) { + throw new Error( + `cso/SKILL.md is missing required security phrases: ${missing.join(', ')}. ` + + `These are load-bearing for the skill's audit posture. If you intentionally ` + + `removed them, update this test with the new phrasing.`, + ); + } + }); + + test('cso preserves required headings', () => { + const content = fs.readFileSync(CSO_SKILL, 'utf-8'); + for (const heading of MUST_PRESERVE_HEADINGS) { + expect(content).toContain(heading); + } + }); + + test('cso catalog trim landed (frontmatter description ≤ 200 chars)', () => { + const content = fs.readFileSync(CSO_SKILL, 'utf-8'); + const fmMatch = content.match(/^---\n([\s\S]*?)\n---/); + expect(fmMatch).not.toBeNull(); + const fm = fmMatch![1]; + const descMatch = fm.match(/^description:\s+(.+)$/m); + expect(descMatch).not.toBeNull(); + const desc = descMatch![1].trim(); + expect(desc.length).toBeLessThanOrEqual(200); + expect(desc).toContain('(gstack)'); + }); + + test('cso routing prose moved to "## When to invoke" body section', () => { + const content = fs.readFileSync(CSO_SKILL, 'utf-8'); + expect(content).toContain('## When to invoke this skill'); + }); +});