From 4b89406ebea8c6d70f8e1fb1f8770feb830878e0 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Fri, 8 May 2026 23:51:08 -0700 Subject: [PATCH] test(domain-skills): cover #1369 classifier_score=0 quarantine + score>0 promote path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pre-existing T6 test seeded skills via writeSkill (which defaults classifier_score to 0 until L4 is rewired) and then expected 3 uses to auto-promote. PR #1369 added `current.classifier_score > 0` to the gate specifically to block that path — a quarantined skill written under the influence of a poisoned page would otherwise auto-promote after three benign uses. Updated test asserts both halves of the new contract: - classifier_score=0 + 3 uses → stays quarantined (the security guarantee) - classifier_score>0 + 3 more uses → promotes to active (unblock path) Catches both regressions: the gate going away (would re-allow the bypass) and the unblock path breaking (would silently quarantine all skills forever once L4 is rewired). --- browse/test/domain-skills-e2e.test.ts | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/browse/test/domain-skills-e2e.test.ts b/browse/test/domain-skills-e2e.test.ts index 4c26ac56b..29d33c4bc 100644 --- a/browse/test/domain-skills-e2e.test.ts +++ b/browse/test/domain-skills-e2e.test.ts @@ -84,11 +84,34 @@ describe('$B domain-skill (E2E gate tier)', () => { expect(out).toContain('[quarantined] 127.0.0.1'); }); - test('readSkill returns null until the skill is promoted to active (T6)', async () => { + test('readSkill returns null while quarantined; classifier_score=0 blocks auto-promote (#1369)', async () => { const { readSkill, recordSkillUse } = await import('../src/domain-skills'); + const jsonlPath = path.join(TMP_HOME, 'projects', 'e2e-test-slug', 'learnings.jsonl'); + // While quarantined, readSkill returns null expect(await readSkill('127.0.0.1', 'e2e-test-slug')).toBeNull(); - // Three uses without flag triggers auto-promote + + // Three uses without flag with classifier_score=0 (the default until L4 is + // rewired) MUST stay quarantined per #1369. The gate is load-bearing: a + // quarantined skill written under the influence of a poisoned page would + // otherwise auto-promote after three benign uses without the L4 body scan + // ever running. + await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); + await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); + await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); + expect(await readSkill('127.0.0.1', 'e2e-test-slug')).toBeNull(); + + // Simulate L4 having scored the body (classifier_score > 0) by appending a + // new tombstone row with a non-zero score, then verify the next use + // promotes. This documents the unblock path the day L4 starts populating + // classifier_score for skill writes again. + const lines = (await fs.readFile(jsonlPath, 'utf8')).trim().split('\n').map((l) => JSON.parse(l)); + const latest = lines.filter((r: any) => r.type === 'domain' && r.host === '127.0.0.1').pop(); + expect(latest).toBeTruthy(); + const scored = { ...latest, classifier_score: 0.05, version: latest.version + 1, updated_ts: new Date().toISOString() }; + await fs.appendFile(jsonlPath, JSON.stringify(scored) + '\n'); + + // Now three uses promote await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); await recordSkillUse('127.0.0.1', 'e2e-test-slug', false); await recordSkillUse('127.0.0.1', 'e2e-test-slug', false);