From 2aff29e956244a8c4629eedbff240a7d41f5934f Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 25 May 2026 21:09:59 -0700 Subject: [PATCH] fix(catalog): preserve routing prose when first sentence exceeds 200 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit splitCatalogDescription truncated the lead BEFORE computing routing extraction, which meant skills whose first sentence was over 200 chars (design-consultation: 207 chars) had their entire routing prose silently dropped — the "## When to invoke" body section came out empty. Root cause: routing was extracted via `collapsed.indexOf(lead)` after lead was suffixed with "...". The "..." never appeared in the original string, so indexOf returned -1 and routingProse fell back to empty. Fix: compute routing from sentenceLead (the untruncated first sentence) BEFORE truncating the displayed lead. The displayed lead still gets "..." when over 200 chars, but the routing extraction uses the real boundary. Also: refresh golden snapshots for claude/codex/factory ship and update two unit tests that asserted v1.44 behavior: - skill-validation.test.ts: trigger-phrase + proactive-routing tests now search whole content, not just frontmatter (T4 moved them to a body "## When to invoke" section) - writing-style-resolver.test.ts: jargon-list assertion now expects the T3 reference pointer, not the inline list Test plan: - bun test test/skill-validation.test.ts test/writing-style-resolver.test.ts test/host-config.test.ts test/skill-size-budget.test.ts test/parity-suite.test.ts test/skill-coverage-matrix.test.ts test/skill-coverage-floor.test.ts test/cso-preserved.test.ts test/resolver-entry.test.ts test/helpers/capture-parity-baseline.test.ts test/gen-skill-docs.test.ts: 1134 pass, 0 fail - Manual verify: design-consultation/SKILL.md "## When to invoke this skill" body section now contains "Use when asked to..." + "Proactively suggest..." Co-Authored-By: Claude Opus 4.7 (1M context) --- design-consultation/SKILL.md | 6 ++ scripts/gen-skill-docs.ts | 31 ++++--- scripts/proactive-suggestions.json | 2 +- test/fixtures/golden/claude-ship-SKILL.md | 94 +++------------------- test/fixtures/golden/codex-ship-SKILL.md | 79 +----------------- test/fixtures/golden/factory-ship-SKILL.md | 79 +----------------- test/skill-validation.test.ts | 16 ++-- test/writing-style-resolver.test.ts | 14 +++- 8 files changed, 57 insertions(+), 264 deletions(-) diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index 3f7db26da..fda9c815d 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -46,6 +46,12 @@ gbrain: ## When to invoke this skill +Creates DESIGN.md as your project's design source +of truth. For existing sites, use /plan-design-review to infer the system instead. +Use when asked to "design system", "brand guidelines", or "create DESIGN.md". +Proactively suggest when starting a new project's UI with no existing +design system or DESIGN.md. + ## Preamble (run first) ```bash diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index d928cd591..f81087e77 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -245,29 +245,38 @@ export function splitCatalogDescription(description: string): CatalogParts { // First normalize to single-line for sentence detection, then back out. const collapsed = working.replace(/\s+/g, ' ').trim(); const sentenceMatch = collapsed.match(/^([^.!?]*[.!?])(?:\s|$)/); - let lead = sentenceMatch ? sentenceMatch[1].trim() : collapsed.split(/\s/).slice(0, 20).join(' '); + // sentenceLead is the FULL first sentence (no truncation). We compute routing + // from this position, then optionally truncate the displayed lead afterwards. + // Truncating first then computing routing was the v1.45.0.0 bug — when the + // first sentence exceeded 200 chars, the routing extraction would lose the + // entire tail of the description (design-consultation's "Use when..." + // routing prose silently dropped). + const sentenceLead = sentenceMatch ? sentenceMatch[1].trim() : collapsed.split(/\s/).slice(0, 20).join(' '); - // If the lead would be too long, trim to the first 140 chars at a word boundary + // Routing prose: everything AFTER the first sentence boundary in the collapsed view. + const leadInCollapsed = collapsed.indexOf(sentenceLead); + const routingCollapsed = leadInCollapsed >= 0 + ? collapsed.slice(leadInCollapsed + sentenceLead.length).trim() + : ''; + + // Now produce the displayed lead — truncated if too long. The original + // sentenceLead is preserved for routing extraction below. + let lead = sentenceLead; if (lead.length > 200) { const trunc = lead.slice(0, 197); const lastSpace = trunc.lastIndexOf(' '); lead = (lastSpace > 60 ? trunc.slice(0, lastSpace) : trunc) + '...'; } - - const leadInCollapsed = collapsed.indexOf(lead); - const routingCollapsed = leadInCollapsed >= 0 - ? collapsed.slice(leadInCollapsed + lead.length).trim() - : ''; // Restore line breaks for routing prose by mapping back to original layout. // Use original whitespace structure where possible; fall back to collapsed. + // Anchor recovery on sentenceLead (the untruncated first sentence) — not + // `lead` (which may have a "..." suffix and won't substring-match `working`). let routingProse = routingCollapsed; - // Try to recover the multi-line layout: split working at the lead boundary. - const collapsedLeadIdx = working.replace(/\s+/g, ' ').indexOf(lead); + const collapsedLeadIdx = working.replace(/\s+/g, ' ').indexOf(sentenceLead); if (collapsedLeadIdx >= 0) { - // Walk the original working string until we've consumed lead.length collapsed chars let consumed = 0; let cut = 0; - for (let i = 0; i < working.length && consumed < collapsedLeadIdx + lead.length; i++) { + for (let i = 0; i < working.length && consumed < collapsedLeadIdx + sentenceLead.length; i++) { if (/\s/.test(working[i])) { if (i === 0 || /\s/.test(working[i - 1])) continue; consumed += 1; diff --git a/scripts/proactive-suggestions.json b/scripts/proactive-suggestions.json index 97caec81b..122d46cc9 100644 --- a/scripts/proactive-suggestions.json +++ b/scripts/proactive-suggestions.json @@ -45,7 +45,7 @@ }, "design-consultation": { "lead": "Design consultation: understands your product, researches the landscape, proposes a complete design system (aesthetic, typography, color, layout, spacing, motion), and generates font+color preview...", - "routing": "", + "routing": "Creates DESIGN.md as your project's design source\nof truth. For existing sites, use /plan-design-review to infer the system instead.\nUse when asked to \"design system\", \"brand guidelines\", or \"create DESIGN.md\".\nProactively suggest when starting a new project's UI with no existing\ndesign system or DESIGN.md.", "voice_line": null }, "learn": { diff --git a/test/fixtures/golden/claude-ship-SKILL.md b/test/fixtures/golden/claude-ship-SKILL.md index 38da52874..090798914 100644 --- a/test/fixtures/golden/claude-ship-SKILL.md +++ b/test/fixtures/golden/claude-ship-SKILL.md @@ -2,12 +2,7 @@ name: ship preamble-tier: 4 version: 1.0.0 -description: | - Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, - update CHANGELOG, commit, push, create PR. Use when asked to "ship", "deploy", - "push to main", "create a PR", "merge and push", or "get it deployed". - Proactively invoke this skill (do NOT push/PR directly) when the user says code - is ready, asks about deploying, wants to push code up, or asks to create a PR. (gstack) +description: Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, update CHANGELOG, commit, push, create PR. (gstack) allowed-tools: - Bash - Read @@ -27,6 +22,14 @@ triggers: + +## When to invoke this skill + +Use when asked to "ship", "deploy", +"push to main", "create a PR", "merge and push", or "get it deployed". +Proactively invoke this skill (do NOT push/PR directly) when the user says code +is ready, asks about deploying, wants to push code up, or asks to create a PR. + ## Preamble (run first) ```bash @@ -553,84 +556,7 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i - User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section. - Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses. -Jargon list, gloss on first use if the term appears: -- idempotent -- idempotency -- race condition -- deadlock -- cyclomatic complexity -- N+1 -- N+1 query -- backpressure -- memoization -- eventual consistency -- CAP theorem -- CORS -- CSRF -- XSS -- SQL injection -- prompt injection -- DDoS -- rate limit -- throttle -- circuit breaker -- load balancer -- reverse proxy -- SSR -- CSR -- hydration -- tree-shaking -- bundle splitting -- code splitting -- hot reload -- tombstone -- soft delete -- cascade delete -- foreign key -- composite index -- covering index -- OLTP -- OLAP -- sharding -- replication lag -- quorum -- two-phase commit -- saga -- outbox pattern -- inbox pattern -- optimistic locking -- pessimistic locking -- thundering herd -- cache stampede -- bloom filter -- consistent hashing -- virtual DOM -- reconciliation -- closure -- hoisting -- tail call -- GIL -- zero-copy -- mmap -- cold start -- warm start -- green-blue deploy -- canary deploy -- feature flag -- kill switch -- dead letter queue -- fan-out -- fan-in -- debounce -- throttle (UI) -- hydration mismatch -- memory leak -- GC pause -- heap fragmentation -- stack overflow -- null pointer -- dangling pointer -- buffer overflow +Curated jargon list lives at `~/.claude/skills/gstack/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases. ## Completeness Principle — Boil the Lake diff --git a/test/fixtures/golden/codex-ship-SKILL.md b/test/fixtures/golden/codex-ship-SKILL.md index d0159842f..5610f747d 100644 --- a/test/fixtures/golden/codex-ship-SKILL.md +++ b/test/fixtures/golden/codex-ship-SKILL.md @@ -542,84 +542,7 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i - User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section. - Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses. -Jargon list, gloss on first use if the term appears: -- idempotent -- idempotency -- race condition -- deadlock -- cyclomatic complexity -- N+1 -- N+1 query -- backpressure -- memoization -- eventual consistency -- CAP theorem -- CORS -- CSRF -- XSS -- SQL injection -- prompt injection -- DDoS -- rate limit -- throttle -- circuit breaker -- load balancer -- reverse proxy -- SSR -- CSR -- hydration -- tree-shaking -- bundle splitting -- code splitting -- hot reload -- tombstone -- soft delete -- cascade delete -- foreign key -- composite index -- covering index -- OLTP -- OLAP -- sharding -- replication lag -- quorum -- two-phase commit -- saga -- outbox pattern -- inbox pattern -- optimistic locking -- pessimistic locking -- thundering herd -- cache stampede -- bloom filter -- consistent hashing -- virtual DOM -- reconciliation -- closure -- hoisting -- tail call -- GIL -- zero-copy -- mmap -- cold start -- warm start -- green-blue deploy -- canary deploy -- feature flag -- kill switch -- dead letter queue -- fan-out -- fan-in -- debounce -- throttle (UI) -- hydration mismatch -- memory leak -- GC pause -- heap fragmentation -- stack overflow -- null pointer -- dangling pointer -- buffer overflow +Curated jargon list lives at `$GSTACK_ROOT/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases. ## Completeness Principle — Boil the Lake diff --git a/test/fixtures/golden/factory-ship-SKILL.md b/test/fixtures/golden/factory-ship-SKILL.md index 9ccdfeeeb..a7426c9ca 100644 --- a/test/fixtures/golden/factory-ship-SKILL.md +++ b/test/fixtures/golden/factory-ship-SKILL.md @@ -544,84 +544,7 @@ Applies to AskUserQuestion, user replies, and findings. AskUserQuestion Format i - User-turn override wins: if the current message asks for terse / no explanations / just the answer, skip this section. - Terse mode (EXPLAIN_LEVEL: terse): no glosses, no outcome-framing layer, shorter responses. -Jargon list, gloss on first use if the term appears: -- idempotent -- idempotency -- race condition -- deadlock -- cyclomatic complexity -- N+1 -- N+1 query -- backpressure -- memoization -- eventual consistency -- CAP theorem -- CORS -- CSRF -- XSS -- SQL injection -- prompt injection -- DDoS -- rate limit -- throttle -- circuit breaker -- load balancer -- reverse proxy -- SSR -- CSR -- hydration -- tree-shaking -- bundle splitting -- code splitting -- hot reload -- tombstone -- soft delete -- cascade delete -- foreign key -- composite index -- covering index -- OLTP -- OLAP -- sharding -- replication lag -- quorum -- two-phase commit -- saga -- outbox pattern -- inbox pattern -- optimistic locking -- pessimistic locking -- thundering herd -- cache stampede -- bloom filter -- consistent hashing -- virtual DOM -- reconciliation -- closure -- hoisting -- tail call -- GIL -- zero-copy -- mmap -- cold start -- warm start -- green-blue deploy -- canary deploy -- feature flag -- kill switch -- dead letter queue -- fan-out -- fan-in -- debounce -- throttle (UI) -- hydration mismatch -- memory leak -- GC pause -- heap fragmentation -- stack overflow -- null pointer -- dangling pointer -- buffer overflow +Curated jargon list lives at `$GSTACK_ROOT/scripts/jargon-list.json` (80+ terms). On the first jargon term you encounter this session, Read that file once; treat the `terms` array as the canonical list. The list is repo-owned and may grow between releases. ## Completeness Principle — Boil the Lake diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts index 7df535552..a7f51cca1 100644 --- a/test/skill-validation.test.ts +++ b/test/skill-validation.test.ts @@ -1480,14 +1480,15 @@ describe('Skill trigger phrases', () => { const skillPath = path.join(ROOT, skill, 'SKILL.md'); if (!fs.existsSync(skillPath)) return; const content = fs.readFileSync(skillPath, 'utf-8'); - // Extract description from frontmatter - const frontmatterEnd = content.indexOf('---', 4); - const frontmatter = content.slice(0, frontmatterEnd); - expect(frontmatter).toMatch(/Use when/i); + // v1.45.0.0 catalog trim moved trigger prose out of frontmatter into a + // body "## When to invoke" section. Search the full file content, not + // just frontmatter. The trigger phrase must still appear somewhere in + // the skill so agents can match user requests to the skill. + expect(content).toMatch(/Use when/i); }); } - // Skills with proactive triggers should have "Proactively suggest" in description + // Skills with proactive triggers should have "Proactively suggest" somewhere in the skill. const SKILLS_REQUIRING_PROACTIVE = [ 'qa', 'qa-only', 'ship', 'review', 'investigate', 'office-hours', 'plan-ceo-review', 'plan-eng-review', 'plan-design-review', @@ -1499,9 +1500,8 @@ describe('Skill trigger phrases', () => { const skillPath = path.join(ROOT, skill, 'SKILL.md'); if (!fs.existsSync(skillPath)) return; const content = fs.readFileSync(skillPath, 'utf-8'); - const frontmatterEnd = content.indexOf('---', 4); - const frontmatter = content.slice(0, frontmatterEnd); - expect(frontmatter).toMatch(/Proactively (suggest|invoke)/i); + // Same catalog-trim consideration — search the full file content. + expect(content).toMatch(/Proactively (suggest|invoke)/i); }); } }); diff --git a/test/writing-style-resolver.test.ts b/test/writing-style-resolver.test.ts index fce957c22..6d7444536 100644 --- a/test/writing-style-resolver.test.ts +++ b/test/writing-style-resolver.test.ts @@ -49,11 +49,17 @@ describe('Writing Style preamble section', () => { expect(out).toMatch(/terse|no explanations|user-turn override|current message/i); }); - test('tier 2+ preamble inlines jargon list', () => { + test('tier 2+ preamble references jargon list by path (v1.45.0.0 T3 — pointer, not inline)', () => { const out = generatePreamble(makeCtx('claude', 2)); - // Spot-check a few terms from scripts/jargon-list.json - expect(out).toContain('idempotent'); - expect(out).toContain('race condition'); + // T3 dedup: the 80-term jargon list lives in scripts/jargon-list.json. + // The Writing Style section points at the file rather than inlining it, + // saving ~70 KB across the corpus. Agents Read the JSON on first + // jargon term encountered per session. + expect(out).toContain('jargon-list.json'); + expect(out).toContain('Curated jargon list'); + // Negative check: the literal term lines should NOT be inlined any more. + expect(out).not.toMatch(/^- idempotent$/m); + expect(out).not.toMatch(/^- race condition$/m); }); test('tier 2+ preamble includes terse-mode gate condition', () => {