test: fix carve-broken CI evals (union reads + section fixtures)

Two CI eval jobs failed on the carved plan-* skills because they read content
that moved into sections/:

- llm-judge (skill-llm-eval): runWorkflowJudge sliced SKILL.md between markers
  like "## Review Sections" / "## CRITICAL RULE" that now live in
  sections/review-sections.md. The markers vanished from the skeleton, so the
  judge scored empty/wrong content. Fix: read the skeleton+sections union.
  Verified: plan-ceo modes / plan-eng sections / plan-design passes all PASS
  (25/25).

- e2e-plan (skill-e2e-plan): setupPlanDir copied only <skill>/SKILL.md into the
  fixture, not sections/. The carved skill's STOP pointed at a section file that
  was absent, so the model improvised a compressed report table instead of the
  canonical "| Review | Trigger | Why | Runs | Status | Findings |". Fix: copy
  sections/ alongside SKILL.md in all 6 setup sites. Verified: report test PASS,
  canonical table emitted.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Garry Tan 2026-06-02 22:40:40 -07:00
parent 0690066c3f
commit 6c610bf55d
No known key found for this signature in database
GPG Key ID: C1F69E85C74EFE1D
2 changed files with 25 additions and 1 deletions

View File

@ -61,6 +61,8 @@ We're building a new user dashboard that shows recent activity, notifications, a
path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
path.join(planDir, 'plan-ceo-review', 'SKILL.md'),
);
// Carved skills (v2 plan T9): copy sections/ so the review workflow + report template are present.
{ const _sec = path.join(ROOT, 'plan-ceo-review', 'sections'); if (fs.existsSync(_sec)) fs.cpSync(_sec, path.join(planDir, 'plan-ceo-review', 'sections'), { recursive: true }); }
});
afterAll(() => {
@ -145,6 +147,8 @@ We're building a new user dashboard that shows recent activity, notifications, a
path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
path.join(planDir, 'plan-ceo-review', 'SKILL.md'),
);
// Carved skills (v2 plan T9): copy sections/ so the review workflow + report template are present.
{ const _sec = path.join(ROOT, 'plan-ceo-review', 'sections'); if (fs.existsSync(_sec)) fs.cpSync(_sec, path.join(planDir, 'plan-ceo-review', 'sections'), { recursive: true }); }
});
afterAll(() => {
@ -213,6 +217,8 @@ describeIfSelected('Plan CEO Review Expansion Energy E2E', ['plan-ceo-review-exp
path.join(ROOT, 'plan-ceo-review', 'SKILL.md'),
path.join(planDir, 'plan-ceo-review', 'SKILL.md'),
);
// Carved skills (v2 plan T9): copy sections/ so the review workflow + report template are present.
{ const _sec = path.join(ROOT, 'plan-ceo-review', 'sections'); if (fs.existsSync(_sec)) fs.cpSync(_sec, path.join(planDir, 'plan-ceo-review', 'sections'), { recursive: true }); }
});
afterAll(() => {
@ -319,6 +325,8 @@ Replace session-cookie auth with JWT tokens. Currently using express-session + R
path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
path.join(planDir, 'plan-eng-review', 'SKILL.md'),
);
// Carved skills (v2 plan T9): copy sections/ so the review workflow + report template are present.
{ const _sec = path.join(ROOT, 'plan-eng-review', 'sections'); if (fs.existsSync(_sec)) fs.cpSync(_sec, path.join(planDir, 'plan-eng-review', 'sections'), { recursive: true }); }
});
afterAll(() => {
@ -415,6 +423,8 @@ export function main() { return Dashboard(); }
path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
path.join(planDir, 'plan-eng-review', 'SKILL.md'),
);
// Carved skills (v2 plan T9): copy sections/ so the review workflow + report template are present.
{ const _sec = path.join(ROOT, 'plan-eng-review', 'sections'); if (fs.existsSync(_sec)) fs.cpSync(_sec, path.join(planDir, 'plan-eng-review', 'sections'), { recursive: true }); }
// Set up remote-slug shim and browse shims (plan-eng-review uses remote-slug for artifact path)
setupBrowseShims(planDir);
@ -663,6 +673,8 @@ We're building a real-time notification system for our SaaS app.
path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
path.join(planDir, 'plan-eng-review', 'SKILL.md'),
);
// Carved skills (v2 plan T9): copy sections/ so the review workflow + report template are present.
{ const _sec = path.join(ROOT, 'plan-eng-review', 'sections'); if (fs.existsSync(_sec)) fs.cpSync(_sec, path.join(planDir, 'plan-eng-review', 'sections'), { recursive: true }); }
});
afterAll(() => {

View File

@ -540,7 +540,19 @@ async function runWorkflowJudge(opts: {
const defaults = { clarity: 4, completeness: 3, actionability: 4 };
const thresholds = { ...defaults, ...opts.thresholds };
const content = fs.readFileSync(path.join(ROOT, opts.skillPath), 'utf-8');
// Read the skeleton + sections UNION so carved skills (v2 plan T9) still
// expose markers that moved into sections/*.md (e.g. plan-eng's "## Review
// Sections" + "## CRITICAL RULE", plan-design's 7 passes). Without this the
// slice markers vanish from the skeleton and the judge scores empty content.
let content = fs.readFileSync(path.join(ROOT, opts.skillPath), 'utf-8');
const secDir = path.join(ROOT, path.dirname(opts.skillPath), 'sections');
if (fs.existsSync(secDir)) {
for (const f of fs.readdirSync(secDir).sort()) {
if (f.endsWith('.md') && !f.endsWith('.md.tmpl')) {
content += '\n' + fs.readFileSync(path.join(secDir, f), 'utf-8');
}
}
}
const startIdx = content.indexOf(opts.startMarker);
if (startIdx === -1) throw new Error(`Start marker not found in ${opts.skillPath}: "${opts.startMarker}"`);