diff --git a/test/skill-coverage-floor.test.ts b/test/skill-coverage-floor.test.ts new file mode 100644 index 000000000..a0de76292 --- /dev/null +++ b/test/skill-coverage-floor.test.ts @@ -0,0 +1,153 @@ +/** + * Skill coverage floor — gate-tier, free, runs every PR. + * + * Phase 0 of the cathedral parity-eval suite: structural-compliance smoke + * test that covers every gstack skill with file-IO assertions. The intent + * is "every skill ships with at least one CI-blocking check" — even when + * a skill doesn't (yet) have a behavioral E2E test, this floor catches + * frontmatter regressions, missing generated header, empty/trivial bodies, + * and dangling SKILL.md.tmpl-without-SKILL.md mismatches. + * + * Pairs with test/skill-coverage-matrix.ts (the registry) and + * test/parity-suite.test.ts (the content-invariant suite). Together, + * v1.45.0.0 ships with: floor (this file) + matrix (registry CI gate) + * + invariants (content per skill family) + size budget. That's the + * eval-first foundation the v2.0.0.0 sections/ work builds on. + */ + +import { describe, test, expect } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import { SKILL_COVERAGE } from './skill-coverage-matrix'; + +const REPO_ROOT = path.resolve(import.meta.dir, '..'); + +function readSkillMd(skill: string): string | null { + const p = path.join(REPO_ROOT, skill, 'SKILL.md'); + try { + return fs.readFileSync(p, 'utf-8'); + } catch { + return null; + } +} + +function listSkillDirs(): string[] { + const entries = fs.readdirSync(REPO_ROOT, { withFileTypes: true }); + return entries + .filter(e => e.isDirectory() && !e.name.startsWith('.')) + .filter(e => e.name !== 'node_modules' && e.name !== 'docs' && e.name !== 'test') + .filter(e => fs.existsSync(path.join(REPO_ROOT, e.name, 'SKILL.md'))) + .map(e => e.name) + .sort(); +} + +describe('skill-coverage-floor: every skill passes structural compliance', () => { + const skills = listSkillDirs(); + + test('skill registry mentions every skill on disk', () => { + const onDisk = new Set(skills); + const inRegistry = new Set(Object.keys(SKILL_COVERAGE)); + const missingFromRegistry: string[] = []; + for (const s of onDisk) { + if (!inRegistry.has(s)) missingFromRegistry.push(s); + } + if (missingFromRegistry.length > 0) { + throw new Error( + `Skills on disk missing from test/skill-coverage-matrix.ts: ${missingFromRegistry.join(', ')}. ` + + `Add an entry to SKILL_COVERAGE with at least 'test/skill-coverage-floor.test.ts' in gate[].`, + ); + } + }); + + test('every registry entry has at least one gate-tier test', () => { + const missingGate: string[] = []; + for (const [skill, coverage] of Object.entries(SKILL_COVERAGE)) { + if (!coverage.gate || coverage.gate.length === 0) missingGate.push(skill); + } + if (missingGate.length > 0) { + throw new Error( + `Skills with no gate-tier eval: ${missingGate.join(', ')}. ` + + `Eval-first foundation requires at least one CI-blocking check per skill.`, + ); + } + }); + + test('every gate-tier test path referenced in registry exists on disk', () => { + const missing: string[] = []; + for (const [skill, coverage] of Object.entries(SKILL_COVERAGE)) { + for (const testPath of [...coverage.gate, ...coverage.periodic]) { + const fullPath = path.join(REPO_ROOT, testPath); + if (!fs.existsSync(fullPath)) { + missing.push(`${skill} → ${testPath}`); + } + } + } + if (missing.length > 0) { + throw new Error(`Registry references missing test files:\n ${missing.join('\n ')}`); + } + }); + + // Per-skill structural compliance (file IO only, no LLM) + for (const skill of skills) { + describe(`skill: ${skill}`, () => { + test('SKILL.md exists', () => { + const content = readSkillMd(skill); + expect(content).not.toBeNull(); + }); + + test('frontmatter is well-formed and contains name + description', () => { + const content = readSkillMd(skill)!; + expect(content.startsWith('---\n')).toBe(true); + const fmEnd = content.indexOf('\n---', 4); + expect(fmEnd).toBeGreaterThan(0); + const fm = content.slice(4, fmEnd); + // name: ... + expect(/^name:\s*\S/m.test(fm)).toBe(true); + // description: ... (either inline or block form) + expect(/^description:\s*(\S|\|)/m.test(fm)).toBe(true); + }); + + test('frontmatter description fits the catalog-trim contract', () => { + const content = readSkillMd(skill)!; + const fmEnd = content.indexOf('\n---', 4); + const fm = content.slice(4, fmEnd); + // Inline form: description: + const inlineMatch = fm.match(/^description:\s+(.+)$/m); + // Block form: description: |\n multiline + const blockMatch = fm.match(/^description:\s*\|/m); + if (inlineMatch) { + // Catalog-trimmed: should be ≤ 250 chars + expect(inlineMatch[1].length).toBeLessThanOrEqual(250); + } else if (blockMatch) { + // Block form is acceptable for small skills (under-120-chars baseline + // didn't trigger catalog trim). No size cap here; the parity-suite + // and size-budget tests handle bytes. + } else { + throw new Error(`${skill}: description field is not in inline or block form`); + } + }); + + test('generated header present (only edit .tmpl, not .md)', () => { + const content = readSkillMd(skill)!; + expect(content).toContain('AUTO-GENERATED from SKILL.md.tmpl'); + }); + + test('body is non-trivial (≥ 200 bytes after frontmatter)', () => { + const content = readSkillMd(skill)!; + const fmEnd = content.indexOf('\n---', 4); + const body = content.slice(fmEnd + 5).trim(); + expect(body.length).toBeGreaterThanOrEqual(200); + }); + + test('no unresolved {{TEMPLATE}} placeholders leaked into output', () => { + const content = readSkillMd(skill)!; + const leaks = content.match(/\{\{[A-Z_]+(?::[^}]+)?\}\}/g); + if (leaks) { + throw new Error( + `${skill}: ${leaks.length} unresolved placeholder(s) in generated SKILL.md: ${leaks.slice(0, 3).join(', ')}${leaks.length > 3 ? ', ...' : ''}`, + ); + } + }); + }); + } +}); diff --git a/test/skill-coverage-matrix.test.ts b/test/skill-coverage-matrix.test.ts new file mode 100644 index 000000000..1c212d456 --- /dev/null +++ b/test/skill-coverage-matrix.test.ts @@ -0,0 +1,72 @@ +/** + * Skill coverage matrix CI gate (v1.45.0.0 T1). + * + * Asserts every skill on disk has an entry in SKILL_COVERAGE with at + * least one gate-tier test. The detailed per-skill structural checks + * live in test/skill-coverage-floor.test.ts; this file is the matrix- + * level gate that surfaces "skill added but eval not registered" cleanly. + */ + +import { describe, test, expect } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import { SKILL_COVERAGE, type SkillCoverage } from './skill-coverage-matrix'; + +const REPO_ROOT = path.resolve(import.meta.dir, '..'); + +function discoverSkills(): string[] { + return fs.readdirSync(REPO_ROOT, { withFileTypes: true }) + .filter(e => e.isDirectory() && !e.name.startsWith('.')) + .filter(e => fs.existsSync(path.join(REPO_ROOT, e.name, 'SKILL.md'))) + .map(e => e.name) + .sort(); +} + +describe('skill coverage matrix', () => { + test('SKILL_COVERAGE is exported and non-empty', () => { + expect(typeof SKILL_COVERAGE).toBe('object'); + expect(Object.keys(SKILL_COVERAGE).length).toBeGreaterThan(0); + }); + + test('every entry has the right shape', () => { + for (const [skill, coverage] of Object.entries(SKILL_COVERAGE)) { + expect(Array.isArray(coverage.gate)).toBe(true); + expect(Array.isArray(coverage.periodic)).toBe(true); + expect(coverage.gate.length).toBeGreaterThan(0); + for (const p of [...coverage.gate, ...coverage.periodic]) { + expect(typeof p).toBe('string'); + expect(p.startsWith('test/')).toBe(true); + expect(p.endsWith('.test.ts')).toBe(true); + } + } + }); + + test('every skill on disk has a registry entry', () => { + const skills = discoverSkills(); + const missing: string[] = []; + for (const s of skills) { + if (!SKILL_COVERAGE[s]) missing.push(s); + } + if (missing.length > 0) { + throw new Error( + `Skills on disk missing from SKILL_COVERAGE: ${missing.join(', ')}. ` + + `Add an entry to test/skill-coverage-matrix.ts with at least ` + + `'test/skill-coverage-floor.test.ts' in gate[].`, + ); + } + }); + + test('no registry entry references a skill that does not exist on disk', () => { + const skills = new Set(discoverSkills()); + const orphans: string[] = []; + for (const skill of Object.keys(SKILL_COVERAGE)) { + if (!skills.has(skill)) orphans.push(skill); + } + if (orphans.length > 0) { + throw new Error( + `Registry references skills not on disk: ${orphans.join(', ')}. ` + + `Remove from SKILL_COVERAGE or restore the skill directory.`, + ); + } + }); +}); diff --git a/test/skill-coverage-matrix.ts b/test/skill-coverage-matrix.ts new file mode 100644 index 000000000..2e87e46e4 --- /dev/null +++ b/test/skill-coverage-matrix.ts @@ -0,0 +1,181 @@ +/** + * Skill coverage matrix (v1.45.0.0 T1, cathedral Phase 0). + * + * Single source of truth mapping each gstack skill to its E2E test files. + * The CI gate at test/skill-coverage-matrix.test.ts fails if a skill has + * no gate-tier entry, ensuring the eval-first foundation holds: every + * skill has at least one CI-blocking check that asserts must-have + * behavior. + * + * Two tiers per entry: + * gate CI-blocking, runs on every PR, target <$0.50/test or free. + * periodic Weekly cron, deeper coverage, can cost ~$1-$3/test. + * + * The 'floor' entry refers to test/skill-coverage-floor.test.ts — + * a structural-compliance smoke test that covers every skill with + * file-IO checks (free, no LLM cost). When a skill has only 'floor' + * coverage, that's the eval-first minimum; future work can layer + * behavioral checks on top. + */ + +export interface SkillCoverage { + /** Gate-tier test file paths (relative to repo root). At least one required per skill. */ + gate: string[]; + /** Periodic-tier test file paths. Optional but recommended. */ + periodic: string[]; + /** Brief note on why this coverage is the right shape for this skill. */ + rationale?: string; +} + +/** + * Per-skill coverage. Keys MUST match the top-level skill directory name. + * The CI test asserts every skill in the repo has an entry here AND that + * gate[] is non-empty. + * + * Adding a new skill: add an entry here AND either reference an existing + * test that covers it OR add 'test/skill-coverage-floor.test.ts' as the + * minimum gate-tier check. + */ +export const SKILL_COVERAGE: Record = { + // ─── Core loop ────────────────────────────────────────────── + ship: { + gate: ['test/skill-e2e-ship-idempotency.test.ts', 'test/skill-coverage-floor.test.ts'], + periodic: ['test/skill-e2e-workflow.test.ts'], + }, + review: { + gate: ['test/skill-e2e-review.test.ts', 'test/skill-coverage-floor.test.ts'], + periodic: ['test/skill-e2e-review-army.test.ts', 'test/regression-1539-review-self-verify.test.ts'], + }, + qa: { + gate: ['test/skill-e2e-qa-workflow.test.ts', 'test/skill-coverage-floor.test.ts'], + periodic: ['test/skill-e2e-qa-bugs.test.ts'], + }, + 'qa-only': { + gate: ['test/skill-coverage-floor.test.ts'], + periodic: [], + rationale: 'qa-only is qa with --report-only; behavior tested via /qa coverage.', + }, + investigate: { + gate: ['test/skill-coverage-floor.test.ts'], + periodic: [], + }, + browse: { + gate: ['test/skill-coverage-floor.test.ts'], + periodic: [], + rationale: 'browse binary has its own integration suite under browse/test/.', + }, + + // ─── Plan triad ───────────────────────────────────────────── + 'plan-ceo-review': { + gate: [ + 'test/skill-e2e-plan-ceo-finding-floor.test.ts', + 'test/skill-e2e-plan-ceo-plan-mode.test.ts', + 'test/skill-coverage-floor.test.ts', + ], + periodic: [ + 'test/skill-e2e-plan-ceo-finding-count.test.ts', + 'test/skill-e2e-plan-ceo-mode-routing.test.ts', + ], + }, + 'plan-eng-review': { + gate: [ + 'test/skill-e2e-plan-eng-finding-floor.test.ts', + 'test/skill-e2e-plan-eng-plan-mode.test.ts', + 'test/skill-coverage-floor.test.ts', + ], + periodic: [ + 'test/skill-e2e-plan-eng-finding-count.test.ts', + 'test/skill-e2e-plan-eng-multi-finding-batching.test.ts', + ], + }, + 'plan-design-review': { + gate: [ + 'test/skill-e2e-plan-design-finding-floor.test.ts', + 'test/skill-e2e-plan-design-plan-mode.test.ts', + 'test/skill-e2e-plan-design-with-ui.test.ts', + 'test/skill-coverage-floor.test.ts', + ], + periodic: ['test/skill-e2e-plan-design-finding-count.test.ts'], + }, + 'plan-devex-review': { + gate: [ + 'test/skill-e2e-plan-devex-finding-floor.test.ts', + 'test/skill-e2e-plan-devex-plan-mode.test.ts', + 'test/skill-coverage-floor.test.ts', + ], + periodic: ['test/skill-e2e-plan-devex-finding-count.test.ts'], + }, + autoplan: { + gate: ['test/skill-coverage-floor.test.ts'], + periodic: ['test/skill-e2e-autoplan-chain.test.ts', 'test/skill-e2e-autoplan-dual-voice.test.ts'], + }, + 'office-hours': { + gate: ['test/skill-e2e-office-hours.test.ts', 'test/skill-coverage-floor.test.ts'], + periodic: ['test/skill-e2e-office-hours-auto-mode.test.ts', 'test/skill-e2e-office-hours-phase4.test.ts'], + }, + + // ─── Polish + design ──────────────────────────────────────── + 'design-review': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'design-consultation': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'design-shotgun': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'design-html': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + cso: { + gate: ['test/skill-e2e-cso.test.ts', 'test/cso-preserved.test.ts', 'test/skill-coverage-floor.test.ts'], + periodic: [], + rationale: 'cso-preserved.test.ts pins must-not-strip security guidance phrases.', + }, + 'document-release': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'document-generate': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + + // ─── Ops + integrations ───────────────────────────────────── + 'land-and-deploy': { gate: ['test/skill-e2e-deploy.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + canary: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + benchmark: { gate: ['test/skill-e2e-benchmark-providers.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + 'benchmark-models': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + codex: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + retro: { + gate: ['test/skill-coverage-floor.test.ts'], + periodic: ['test/regression-1624-retro-stale-base.test.ts'], + }, + 'gstack-upgrade': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'context-save': { gate: ['test/skill-e2e-context-skills.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + 'context-restore': { gate: ['test/skill-e2e-context-skills.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + 'setup-deploy': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'setup-browser-cookies': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'setup-gbrain': { + gate: [ + 'test/skill-e2e-setup-gbrain-bad-token.test.ts', + 'test/skill-e2e-setup-gbrain-path4-local-pglite.test.ts', + 'test/skill-e2e-setup-gbrain-remote.test.ts', + 'test/skill-coverage-floor.test.ts', + ], + periodic: [], + }, + 'sync-gbrain': { + gate: ['test/skill-coverage-floor.test.ts'], + periodic: ['test/regression-1611-gbrain-sync-resume.test.ts'], + }, + 'open-gstack-browser': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'pair-agent': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + scrape: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + skillify: { gate: ['test/skill-e2e-skillify.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + learn: { gate: ['test/skill-e2e-learnings.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + 'plan-tune': { gate: ['test/skill-e2e-plan-tune.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: [] }, + + // ─── iOS family ───────────────────────────────────────────── + 'ios-qa': { gate: ['test/skill-e2e-ios.test.ts', 'test/skill-coverage-floor.test.ts'], periodic: ['test/skill-e2e-ios-device.test.ts', 'test/skill-e2e-ios-swift-build.test.ts'] }, + 'ios-fix': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'ios-clean': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'ios-sync': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'ios-design-review': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + + // ─── Safety / housekeeping ────────────────────────────────── + careful: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + freeze: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + unfreeze: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + guard: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'landing-report': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + health: { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'make-pdf': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, + 'devex-review': { gate: ['test/skill-coverage-floor.test.ts'], periodic: [] }, +};