fix: cap gstack skill descriptions for codex (#251)

Compresses SKILL.md.tmpl root description to <1024 chars (Codex token limit).
Adds description-length validation test. Includes /autoplan in compressed
skill list (added since PR was branched).

Co-authored-by: cweill <cweill@users.noreply.github.com>
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan 2026-03-22 14:43:14 -07:00
parent 407b156920
commit 1c0927f7c4
No known key found for this signature in database
GPG Key ID: C1F69E85C74EFE1D
4 changed files with 77 additions and 114 deletions

View File

@ -1,44 +1,18 @@
--- ---
name: gstack name: gstack
description: | description: |
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check elements, verify state, diff before/after, take annotated screenshots, test responsive
responsive layouts, test forms and uploads, handle dialogs, and assert element states. layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
user flow, or file a bug with evidence. Also suggest adjacent gstack skills by stage: brainstorm /office-hours; strategy
/plan-ceo-review; architecture /plan-eng-review; design /plan-design-review or
gstack also includes development workflow skills. When you notice the user is at /design-consultation; auto-review /autoplan; debugging /investigate; QA /qa; code review
these stages, suggest the appropriate skill: /review; visual audit /design-review; shipping /ship; docs /document-release; retro
- Brainstorming a new idea → suggest /office-hours /retro; second opinion /codex; prod safety /careful or /guard; scoped edits /freeze or
- Reviewing a plan (strategy) → suggest /plan-ceo-review /unfreeze; gstack upgrades /gstack-upgrade. If the user opts out of suggestions, stop
- Reviewing a plan (architecture) → suggest /plan-eng-review and run gstack-config set proactive false; if they opt back in, run gstack-config set
- Reviewing a plan (design) → suggest /plan-design-review proactive true.
- Auto-reviewing a plan (all reviews at once) → suggest /autoplan
- Creating a design system → suggest /design-consultation
- Debugging errors → suggest /investigate
- Testing the app → suggest /qa
- Code review before merge → suggest /review
- Visual design audit → suggest /design-review
- Ready to deploy / create PR → suggest /ship
- Post-ship doc updates → suggest /document-release
- Weekly retrospective → suggest /retro
- Wanting a second opinion or adversarial code review → suggest /codex
- Working with production or live systems → suggest /careful
- Want to scope edits to one module/directory → suggest /freeze
- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
- Removing edit restrictions → suggest /unfreeze
- Upgrading gstack to latest version → suggest /gstack-upgrade
If the user pushes back on skill suggestions ("stop suggesting things",
"I don't need suggestions", "too aggressive"):
1. Stop suggesting for the rest of this session
2. Run: gstack-config set proactive false
3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive
again if you change your mind."
If the user says "be proactive again" or "turn on suggestions":
1. Run: gstack-config set proactive true
2. Say: "Proactive suggestions are back on."
--- ---
<!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly --> <!-- AUTO-GENERATED from SKILL.md.tmpl — do not edit directly -->
<!-- Regenerate: bun run gen:skill-docs --> <!-- Regenerate: bun run gen:skill-docs -->

View File

@ -2,44 +2,18 @@
name: gstack name: gstack
version: 1.1.0 version: 1.1.0
description: | description: |
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check elements, verify state, diff before/after, take annotated screenshots, test responsive
responsive layouts, test forms and uploads, handle dialogs, and assert element states. layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
user flow, or file a bug with evidence. Also suggest adjacent gstack skills by stage: brainstorm /office-hours; strategy
/plan-ceo-review; architecture /plan-eng-review; design /plan-design-review or
gstack also includes development workflow skills. When you notice the user is at /design-consultation; auto-review /autoplan; debugging /investigate; QA /qa; code review
these stages, suggest the appropriate skill: /review; visual audit /design-review; shipping /ship; docs /document-release; retro
- Brainstorming a new idea → suggest /office-hours /retro; second opinion /codex; prod safety /careful or /guard; scoped edits /freeze or
- Reviewing a plan (strategy) → suggest /plan-ceo-review /unfreeze; gstack upgrades /gstack-upgrade. If the user opts out of suggestions, stop
- Reviewing a plan (architecture) → suggest /plan-eng-review and run gstack-config set proactive false; if they opt back in, run gstack-config set
- Reviewing a plan (design) → suggest /plan-design-review proactive true.
- Auto-reviewing a plan (all reviews at once) → suggest /autoplan
- Creating a design system → suggest /design-consultation
- Debugging errors → suggest /investigate
- Testing the app → suggest /qa
- Code review before merge → suggest /review
- Visual design audit → suggest /design-review
- Ready to deploy / create PR → suggest /ship
- Post-ship doc updates → suggest /document-release
- Weekly retrospective → suggest /retro
- Wanting a second opinion or adversarial code review → suggest /codex
- Working with production or live systems → suggest /careful
- Want to scope edits to one module/directory → suggest /freeze
- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
- Removing edit restrictions → suggest /unfreeze
- Upgrading gstack to latest version → suggest /gstack-upgrade
If the user pushes back on skill suggestions ("stop suggesting things",
"I don't need suggestions", "too aggressive"):
1. Stop suggesting for the rest of this session
2. Run: gstack-config set proactive false
3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive
again if you change your mind."
If the user says "be proactive again" or "turn on suggestions":
1. Run: gstack-config set proactive true
2. Say: "Proactive suggestions are back on."
allowed-tools: allowed-tools:
- Bash - Bash
- Read - Read

View File

@ -2,44 +2,18 @@
name: gstack name: gstack
version: 1.1.0 version: 1.1.0
description: | description: |
Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with
elements, verify page state, diff before/after actions, take annotated screenshots, check elements, verify state, diff before/after, take annotated screenshots, test responsive
responsive layouts, test forms and uploads, handle dialogs, and assert element states. layouts, forms, uploads, dialogs, and capture bug evidence. Use when asked to open or
~100ms per command. Use when you need to test a feature, verify a deployment, dogfood a test a site, verify a deployment, dogfood a user flow, or file a bug with screenshots.
user flow, or file a bug with evidence. Also suggest adjacent gstack skills by stage: brainstorm /office-hours; strategy
/plan-ceo-review; architecture /plan-eng-review; design /plan-design-review or
gstack also includes development workflow skills. When you notice the user is at /design-consultation; auto-review /autoplan; debugging /investigate; QA /qa; code review
these stages, suggest the appropriate skill: /review; visual audit /design-review; shipping /ship; docs /document-release; retro
- Brainstorming a new idea → suggest /office-hours /retro; second opinion /codex; prod safety /careful or /guard; scoped edits /freeze or
- Reviewing a plan (strategy) → suggest /plan-ceo-review /unfreeze; gstack upgrades /gstack-upgrade. If the user opts out of suggestions, stop
- Reviewing a plan (architecture) → suggest /plan-eng-review and run gstack-config set proactive false; if they opt back in, run gstack-config set
- Reviewing a plan (design) → suggest /plan-design-review proactive true.
- Auto-reviewing a plan (all reviews at once) → suggest /autoplan
- Creating a design system → suggest /design-consultation
- Debugging errors → suggest /investigate
- Testing the app → suggest /qa
- Code review before merge → suggest /review
- Visual design audit → suggest /design-review
- Ready to deploy / create PR → suggest /ship
- Post-ship doc updates → suggest /document-release
- Weekly retrospective → suggest /retro
- Wanting a second opinion or adversarial code review → suggest /codex
- Working with production or live systems → suggest /careful
- Want to scope edits to one module/directory → suggest /freeze
- Maximum safety mode (destructive warnings + edit restrictions) → suggest /guard
- Removing edit restrictions → suggest /unfreeze
- Upgrading gstack to latest version → suggest /gstack-upgrade
If the user pushes back on skill suggestions ("stop suggesting things",
"I don't need suggestions", "too aggressive"):
1. Stop suggesting for the rest of this session
2. Run: gstack-config set proactive false
3. Say: "Got it — I'll stop suggesting skills. Just tell me to be proactive
again if you change your mind."
If the user says "be proactive again" or "turn on suggestions":
1. Run: gstack-config set proactive true
2. Say: "Proactive suggestions are back on."
allowed-tools: allowed-tools:
- Bash - Bash
- Read - Read

View File

@ -5,6 +5,39 @@ import * as fs from 'fs';
import * as path from 'path'; import * as path from 'path';
const ROOT = path.resolve(import.meta.dir, '..'); const ROOT = path.resolve(import.meta.dir, '..');
const MAX_SKILL_DESCRIPTION_LENGTH = 1024;
function extractDescription(content: string): string {
const fmEnd = content.indexOf('\n---', 4);
expect(fmEnd).toBeGreaterThan(0);
const frontmatter = content.slice(4, fmEnd);
const lines = frontmatter.split('\n');
let description = '';
let inDescription = false;
const descLines: string[] = [];
for (const line of lines) {
if (line.match(/^description:\s*\|?\s*$/)) {
inDescription = true;
continue;
}
if (line.match(/^description:\s*\S/)) {
return line.replace(/^description:\s*/, '').trim();
}
if (inDescription) {
if (line === '' || line.match(/^\s/)) {
descLines.push(line.replace(/^ /, ''));
} else {
break;
}
}
}
if (descLines.length > 0) {
description = descLines.join('\n').trim();
}
return description;
}
// Dynamic template discovery — matches the generator's findTemplates() behavior. // Dynamic template discovery — matches the generator's findTemplates() behavior.
// New skills automatically get test coverage without updating a static list. // New skills automatically get test coverage without updating a static list.
@ -98,6 +131,14 @@ describe('gen-skill-docs', () => {
} }
}); });
test(`every generated SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => {
for (const skill of ALL_SKILLS) {
const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8');
const description = extractDescription(content);
expect(description.length).toBeLessThanOrEqual(MAX_SKILL_DESCRIPTION_LENGTH);
}
});
test('generated files are fresh (match --dry-run)', () => { test('generated files are fresh (match --dry-run)', () => {
const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], { const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], {
cwd: ROOT, cwd: ROOT,