From cf1d862fa0e18b4496a1efa2b1dcdcbb0b1fa46e Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 30 May 2026 11:32:23 -0700 Subject: [PATCH] test: rebaseline parity-suite v1.44.1 -> v1.53.0.0 The frozen v1.44.1 anchor went stale: five planning skills (plan-ceo-review, plan-eng-review, plan-design-review, investigate, office-hours) crept past the 1.05x ceiling via legitimate v1.49-v1.53 growth (brain-aware planning + the v1.53 redaction guard), so `bun test` was red on a clean checkout of main. Capture a fresh baseline at HEAD (bun run scripts/capture-baseline.ts --tag v1.53.0.0) and re-point the test at it. The per-skill 1.05 ratio is kept, so future bloat is still caught; only the anchor moved. Mirrors the earlier skill-size-budget rebase (v1.44.1 -> v1.47.0.0). Historical v1.44.1 / v1.46.0.0 / v1.47.0.0 baselines are retained for the v1->v2 audit trail. The captured skill bytes equal origin/main exactly (this branch left every SKILL.md untouched). Clears the pre-existing failures noted in the v1.53.0.0 CHANGELOG. Co-Authored-By: Claude Opus 4.8 (1M context) --- test/parity-suite.test.ts | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/test/parity-suite.test.ts b/test/parity-suite.test.ts index 9d6da4868..32ce49f12 100644 --- a/test/parity-suite.test.ts +++ b/test/parity-suite.test.ts @@ -2,9 +2,16 @@ * Cathedral parity suite — gate-tier (free, structural + content checks). * * Runs every PARITY_INVARIANTS check against the current SKILL.md output - * vs the v1.44.1 baseline. Failures get an actionable, per-skill report + * vs the v1.53.0.0 baseline. Failures get an actionable, per-skill report * showing missing phrases, missing headings, and size ratios. * + * Baseline rebased v1.44.1 → v1.53.0.0: the brain-aware-planning releases + * (v1.49–v1.52) plus the v1.53 redaction guard pushed five planning skills + * past the 5% ratchet on the frozen v1.44.1 anchor. Rebasing absorbs that + * legitimate growth at HEAD while keeping the per-skill 1.05 ratio so future + * bloat is still caught. Historical v1.44.1 / v1.46.0.0 / v1.47.0.0 baselines + * are retained in test/fixtures/ for the v1→v2 audit trail. + * * Periodic-tier LLM-judge parity (paid) lands in Phase B (v2.0.0.0) * alongside the sections/ extraction. Plumbing is in parity-harness.ts. */ @@ -16,9 +23,9 @@ import { runParityChecks, PARITY_INVARIANTS } from './helpers/parity-harness'; import type { ParityBaseline } from './helpers/capture-parity-baseline'; const REPO_ROOT = path.resolve(import.meta.dir, '..'); -const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.44.1.json'); +const BASELINE_PATH = path.join(REPO_ROOT, 'test', 'fixtures', 'parity-baseline-v1.53.0.0.json'); -describe('parity suite vs v1.44.1 baseline (gate, free)', () => { +describe('parity suite vs v1.53.0.0 baseline (gate, free)', () => { test('baseline exists', () => { expect(fs.existsSync(BASELINE_PATH)).toBe(true); }); @@ -43,7 +50,7 @@ describe('parity suite vs v1.44.1 baseline (gate, free)', () => { .map(d => ` ${d.skill}:\n - ${d.failures.join('\n - ')}`) .join('\n'); throw new Error( - `${report.failed} skill(s) failed parity checks vs v1.44.1:\n${failureMessages}`, + `${report.failed} skill(s) failed parity checks vs ${baseline.tag}:\n${failureMessages}`, ); }); });