From c88825dba0a2fcddf91a431bbefa70e1af5f00df Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Tue, 26 May 2026 08:39:47 -0700 Subject: [PATCH] test(catalog): unit + regression coverage for catalog-trim helpers Four exported functions in scripts/gen-skill-docs.ts handle every skill's frontmatter rewrite at gen time but had zero unit tests. Both real bugs we shipped (and fixed) on this branch lived in these functions: v1.45.0.0 design-consultation: when the first sentence exceeded 200 chars, routing-prose extraction lost the entire tail (anchored on truncated lead with "..." that didn't substring-match the original). v1.45.0.0 CI freshness: root-skill key leaked the checkout directory name ("seville-v3" vs "gstack") and aggregate order was filesystem- iteration order. Both shapes are now regression-tested: - splitCatalogDescription: 7 tests covering simple multi-line, >200-char first sentence (design-consultation regression), voice-trigger extraction, no-(gstack) handling, embedded periods (documents known fallback), no-period fragments, and idempotency. - buildTrimmedDescription: 3 tests. - buildWhenToInvokeSection: 3 tests. - applyCatalogTrim: 4 tests covering the standard rewrite, no-op for already-short descriptions, the YAML-collision newline fix, and the malformed-frontmatter null return. - proactive-suggestions.json determinism: 3 tests asserting sorted keys, root keyed as "gstack" (not the worktree directory), and no timestamp/generated_at field that would flap CI freshness. Test plan: - bun test test/catalog-trim.test.ts: 20 pass, 0 fail Co-Authored-By: Claude Opus 4.7 (1M context) --- test/catalog-trim.test.ts | 313 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 test/catalog-trim.test.ts diff --git a/test/catalog-trim.test.ts b/test/catalog-trim.test.ts new file mode 100644 index 000000000..e58678603 --- /dev/null +++ b/test/catalog-trim.test.ts @@ -0,0 +1,313 @@ +/** + * Unit tests for catalog-trim helpers (gen-skill-docs.ts T4 functions). + * + * splitCatalogDescription, buildTrimmedDescription, buildWhenToInvokeSection, + * applyCatalogTrim — these handle every skill's frontmatter rewrite at gen + * time. Two bugs already shipped here: + * + * v1.45.0.0 design-consultation: when the first sentence exceeded 200 chars, + * the routing-prose extraction lost the entire tail. design-consultation's + * "Use when asked to..." silently disappeared from the body section. + * + * v1.45.0.0 CI freshness: the root-skill key leaked the checkout directory + * name ("seville-v3" vs "gstack") and aggregate order was filesystem- + * iteration order. Two machines produced two different JSON files. + * + * Both are regression-tested here. Future bugs in these functions surface as + * unit-test failures before they hit CI or production. + */ + +import { describe, test, expect } from 'bun:test'; +import { + splitCatalogDescription, + buildTrimmedDescription, + buildWhenToInvokeSection, + applyCatalogTrim, +} from '../scripts/gen-skill-docs'; + +describe('splitCatalogDescription', () => { + test('extracts lead sentence + routing prose from simple multi-line description', () => { + const desc = + 'Pre-landing PR review. Analyzes diff against the base branch for SQL safety, LLM trust\n' + + 'boundary violations, conditional side effects, and other structural issues. Use when\n' + + 'asked to "review this PR", "code review", "pre-landing review", or "check my diff".\n' + + 'Proactively suggest when the user is about to merge or land code changes. (gstack)'; + + const parts = splitCatalogDescription(desc); + + expect(parts.lead).toBe('Pre-landing PR review.'); + expect(parts.hasGstackTag).toBe(true); + expect(parts.voiceLine).toBeNull(); + expect(parts.routingProse).toContain('Use when'); + expect(parts.routingProse).toContain('Proactively suggest'); + expect(parts.routingProse).toContain('Analyzes diff'); + // (gstack) tag stripped from routingProse + expect(parts.routingProse).not.toContain('(gstack)'); + }); + + test('REGRESSION (design-consultation v1.45.0.0): >200 char first sentence keeps routing', () => { + // This is the exact shape that broke. First sentence (with embedded periods) + // is 207 chars. Original bug: routing extraction ran AFTER lead truncation, + // so collapsed.indexOf(lead) returned -1 (lead ended in "...") and the + // entire "Use when..." + "Proactively..." tail dropped to empty string. + const desc = + 'Design consultation: understands your product, researches the landscape, ' + + 'proposes a complete design system (aesthetic, typography, color, layout, ' + + 'spacing, motion), and generates font+color preview pages. ' + + 'Creates DESIGN.md as your project\'s design source of truth. ' + + 'For existing sites, use /plan-design-review to infer the system instead. ' + + 'Use when asked to "design system", "brand guidelines", or "create DESIGN.md". ' + + 'Proactively suggest when starting a new project\'s UI with no existing ' + + 'design system or DESIGN.md. (gstack)'; + + const parts = splitCatalogDescription(desc); + + // Lead may be truncated with "..." since it exceeds 200 chars + expect(parts.lead.length).toBeLessThanOrEqual(205); + // Critical: routing MUST contain the "Use when..." and "Proactively..." prose + expect(parts.routingProse).toContain('Use when asked to'); + expect(parts.routingProse).toContain('design system'); + expect(parts.routingProse).toContain('Proactively suggest'); + expect(parts.routingProse).toContain('Creates DESIGN.md'); + }); + + test('extracts voice-triggers line when present', () => { + const desc = + 'Quick fix. Use when asked to fix the bug. ' + + 'Voice triggers (speech-to-text aliases): "fix it", "patch this", "make it work". ' + + '(gstack)'; + + const parts = splitCatalogDescription(desc); + + expect(parts.lead).toBe('Quick fix.'); + expect(parts.voiceLine).toContain('Voice triggers'); + expect(parts.voiceLine).toContain('"fix it"'); + expect(parts.routingProse).toContain('Use when asked to fix'); + // Voice line should NOT leak into routing + expect(parts.routingProse).not.toContain('speech-to-text'); + }); + + test('handles description without (gstack) tag', () => { + const desc = 'Single sentence description. With routing prose afterward.'; + const parts = splitCatalogDescription(desc); + expect(parts.lead).toBe('Single sentence description.'); + expect(parts.hasGstackTag).toBe(false); + expect(parts.routingProse).toBe('With routing prose afterward.'); + }); + + test('embedded-period descriptions: known limitation falls back to first-20-words', () => { + // KNOWN LIMITATION: the sentence regex `^([^.!?]*[.!?])(?:\\s|$)` stops + // at the FIRST `.`-then-non-whitespace because [^.!?]* is greedy and + // can't backtrack past a non-period char. For "DESIGN.md and v1.45.0.0 + // in the lead. Use when..." the regex fails entirely and the lead falls + // back to the first 20 words (~the whole short input). + // + // The real-world impact is small: descriptions like "DESIGN.md" or "v1.45" + // appearing in the middle of the FIRST sentence are rare. When they do + // occur, the lead simply becomes the full description (no body section + // generated) — same as a description without a period. The trim CI gate + // still keeps the per-skill size budget honest. + // + // If this gap matters later, replace the regex with a sentence tokenizer + // (compromise.js / Intl.Segmenter) — until then we accept the fallback. + const desc = + 'Skill that mentions DESIGN.md and v1.45.0.0 in the lead. ' + + 'Use when asked to do something.'; + const parts = splitCatalogDescription(desc); + // Actual behavior: lead absorbs the whole input via the word-count fallback. + expect(parts.lead.length).toBeGreaterThan(0); + // routingProse may be empty when the fallback consumes everything. + // The test exists to detect REGRESSIONS (lead becoming oddly short like + // "Skill that mentions DESIGN.") not to assert ideal behavior. + expect(parts.lead).toContain('Skill that mentions'); + }); + + test('description without a period uses first ~20 words as lead', () => { + const desc = 'A long fragment with no sentence terminator drifting on and on across many words for an unusual frontmatter shape'; + const parts = splitCatalogDescription(desc); + expect(parts.lead.length).toBeGreaterThan(0); + expect(parts.lead.split(/\s+/).length).toBeLessThanOrEqual(21); + }); + + test('idempotent: calling on already-trimmed output returns the same parts', () => { + const desc = 'Already trimmed. (gstack)'; + const parts1 = splitCatalogDescription(desc); + const parts2 = splitCatalogDescription(buildTrimmedDescription(parts1)); + // Re-split of a one-line trimmed result keeps lead identical, routing empty. + expect(parts2.lead).toBe(parts1.lead); + expect(parts2.hasGstackTag).toBe(true); + expect(parts2.routingProse).toBe(''); + }); +}); + +describe('buildTrimmedDescription', () => { + test('appends (gstack) when hasGstackTag is true', () => { + const out = buildTrimmedDescription({ + lead: 'Some lead.', + routingProse: 'routing', + voiceLine: null, + hasGstackTag: true, + }); + expect(out).toBe('Some lead. (gstack)'); + }); + + test('omits (gstack) when hasGstackTag is false', () => { + const out = buildTrimmedDescription({ + lead: 'No tag.', + routingProse: '', + voiceLine: null, + hasGstackTag: false, + }); + expect(out).toBe('No tag.'); + }); + + test('trims whitespace from lead', () => { + const out = buildTrimmedDescription({ + lead: ' Lead with whitespace. ', + routingProse: '', + voiceLine: null, + hasGstackTag: true, + }); + expect(out).toBe('Lead with whitespace. (gstack)'); + }); +}); + +describe('buildWhenToInvokeSection', () => { + test('produces markdown H2 with routing prose and voice line', () => { + const out = buildWhenToInvokeSection({ + lead: 'Lead.', + routingProse: 'Use when asked to ship.', + voiceLine: 'Voice triggers (speech-to-text aliases): "ship it".', + hasGstackTag: true, + }); + expect(out).toContain('## When to invoke this skill'); + expect(out).toContain('Use when asked to ship.'); + expect(out).toContain('Voice triggers'); + }); + + test('omits routing block when routingProse is empty', () => { + const out = buildWhenToInvokeSection({ + lead: 'Lead.', + routingProse: '', + voiceLine: null, + hasGstackTag: true, + }); + expect(out).toContain('## When to invoke this skill'); + expect(out).not.toContain('Use when'); + }); + + test('emits even when only voice line is present', () => { + const out = buildWhenToInvokeSection({ + lead: 'Lead.', + routingProse: '', + voiceLine: 'Voice triggers: x.', + hasGstackTag: true, + }); + expect(out).toContain('Voice triggers: x.'); + }); +}); + +describe('applyCatalogTrim', () => { + const minimalSkill = `--- +name: example +description: | + Example skill: this is the first sentence of the description, intended to be + the lead displayed in the catalog. Use when asked to do an example task. + Proactively suggest when the user mentions examples. (gstack) +preamble-tier: 2 +--- + + + +# Example body +Original body content here. +`; + + test('rewrites multi-line description into one-line + body section', () => { + const result = applyCatalogTrim(minimalSkill, 'example'); + expect(result).not.toBeNull(); + const { content, parts } = result!; + // Frontmatter description is now ONE line ending with (gstack) + expect(content).toMatch(/^description: Example skill:[^\n]*\(gstack\)\n/m); + // Body has the When to invoke section + expect(content).toContain('## When to invoke this skill'); + expect(content).toContain('Use when asked to do an example task.'); + expect(content).toContain('Proactively suggest when'); + // Original body still present + expect(content).toContain('# Example body'); + expect(content).toContain('Original body content here.'); + // parts is populated for the aggregator + expect(parts.lead).toContain('Example skill'); + expect(parts.hasGstackTag).toBe(true); + }); + + test('returns null for already-short descriptions (no-op)', () => { + const shortSkill = minimalSkill.replace( + /description: \|[\s\S]*?(?=preamble-tier:)/, + 'description: Already short. (gstack)\n', + ); + const result = applyCatalogTrim(shortSkill, 'example'); + expect(result).toBeNull(); + }); + + test('keeps the newline between description and next YAML field (no field collision)', () => { + // Bug shape from v1.45.0.0 first attempt: produced + // `description: ... (gstack)preamble-tier:` with no newline. + const result = applyCatalogTrim(minimalSkill, 'example'); + expect(result).not.toBeNull(); + expect(result!.content).not.toMatch(/\(gstack\)preamble-tier/); + expect(result!.content).not.toMatch(/\(gstack\)allowed-tools/); + expect(result!.content).toMatch(/\(gstack\)\n[a-z-]+:/); + }); + + test('returns null on content without proper frontmatter', () => { + expect(applyCatalogTrim('no frontmatter here', 'whatever')).toBeNull(); + expect(applyCatalogTrim('---\nincomplete frontmatter', 'whatever')).toBeNull(); + }); +}); + +describe('proactive-suggestions.json determinism (regression for v1.45.0.0 CI freshness fail)', () => { + test('committed JSON keys are alphabetically sorted', () => { + // Reads the actual committed file at scripts/proactive-suggestions.json + // and verifies sort order. Catches regressions to non-sorted output. + const fs = require('fs'); + const path = require('path'); + const json = JSON.parse( + fs.readFileSync(path.join(__dirname, '..', 'scripts', 'proactive-suggestions.json'), 'utf-8'), + ); + const keys = Object.keys(json.skills); + const sorted = [...keys].sort(); + expect(keys).toEqual(sorted); + }); + + test('root skill is keyed as "gstack" (not the checkout directory name)', () => { + // Catches the bug where the root SKILL.md.tmpl's catalog parts get + // registered under the directory basename ("seville-v3" in a Conductor + // worktree, "gstack" on CI). + const fs = require('fs'); + const path = require('path'); + const json = JSON.parse( + fs.readFileSync(path.join(__dirname, '..', 'scripts', 'proactive-suggestions.json'), 'utf-8'), + ); + expect(json.skills).toHaveProperty('gstack'); + // The directory the test runs in must NOT appear as a key. + const repoDir = path.basename(path.resolve(__dirname, '..')); + if (repoDir !== 'gstack') { + expect(json.skills).not.toHaveProperty(repoDir); + } + }); + + test('schema + catalog_mode + note fields are stable', () => { + const fs = require('fs'); + const path = require('path'); + const json = JSON.parse( + fs.readFileSync(path.join(__dirname, '..', 'scripts', 'proactive-suggestions.json'), 'utf-8'), + ); + expect(json).toHaveProperty('$schema'); + expect(json.catalog_mode).toBe('trim'); + expect(typeof json.note).toBe('string'); + // No timestamp field — those cause flapping CI freshness checks. + expect(json).not.toHaveProperty('generated_at'); + expect(json).not.toHaveProperty('timestamp'); + }); +});