fix(taste): rejected preferences must carry rejection-strength confidence

bumpPref() computed confidence as approved_count/(total+1) for both
buckets. Rejected-bucket entries never gain approvals, so their
confidence was pinned to 0 — making `show` print "conf 0.00" for every
rejection, the rejection ranking a no-op, and the taste-drift warning
(opp.confidence >= 0.6) unreachable through real CLI use.

Compute confidence from this bucket's own count. Approved-bucket
behavior is unchanged (total == approved_count there); rejected entries
now reflect rejection strength.

Fixes #1776

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jayesh Betala 2026-05-28 21:39:49 +05:30
parent 19770ea8b4
commit 5eb7f7e2f3
2 changed files with 49 additions and 2 deletions

View File

@ -196,9 +196,14 @@ function bumpPref(list: Preference[], value: string, opposite: Preference[], act
entry.rejected_count += 1;
}
entry.last_seen = now;
// Laplace-smoothed confidence
// Laplace-smoothed confidence in THIS bucket's signal: an approved entry's
// confidence reflects how strongly it's approved, a rejected entry's how
// strongly it's rejected. Using approved_count for both buckets pinned every
// rejected entry to 0 (rejected entries never gain approvals), which made the
// `show` ranking meaningless and the taste-drift warning unreachable.
const total = entry.approved_count + entry.rejected_count;
entry.confidence = entry.approved_count / (total + 1);
const ownCount = action === 'approved' ? entry.approved_count : entry.rejected_count;
entry.confidence = ownCount / (total + 1);
// Flag conflict if the opposite bucket has a strong entry for this value
const opp = opposite.find(p => p.value.toLowerCase() === value.toLowerCase());
if (opp && opp.approved_count + opp.rejected_count >= 3 && opp.confidence >= 0.6) {

View File

@ -141,6 +141,35 @@ describe('taste-engine: Laplace-smoothed confidence', () => {
expect(rejected.rejected_count).toBe(1);
expect(rejected.approved_count).toBe(0);
});
test('repeated rejections raise rejected confidence toward 1', () => {
for (let i = 0; i < 5; i++) {
run(['rejected', `variant-${i}`, '--reason', 'fonts: Comic Sans']);
}
const p = readProfile();
const pref = p.dimensions.fonts.rejected[0];
expect(pref.rejected_count).toBe(5);
// Confidence reflects this bucket's signal: 5 / (5 + 0 + 1) = 0.833.
// Pre-fix this was approved_count/(total+1) = 0/6 = 0 for every rejected entry.
expect(pref.confidence).toBeCloseTo(5 / 6, 5);
});
test('show ranks rejections by strength, not insertion order', () => {
run(['rejected', 'weak', '--reason', 'colors: beige']);
for (let i = 0; i < 4; i++) {
run(['rejected', `strong-${i}`, '--reason', 'colors: crimson']);
}
const r = run(['show']);
expect(r.status).toBe(0);
// The strongly-rejected value must rank above the weakly-rejected one even
// though it was inserted second. Pre-fix both keys were 0, so the sort was a
// no-op and "beige" (inserted first) won.
const crimsonIdx = r.stdout.indexOf('crimson');
const beigeIdx = r.stdout.indexOf('beige');
expect(crimsonIdx).toBeGreaterThanOrEqual(0);
expect(beigeIdx).toBeGreaterThanOrEqual(0);
expect(crimsonIdx).toBeLessThan(beigeIdx);
});
});
describe('taste-engine: decay math', () => {
@ -309,6 +338,19 @@ describe('taste-engine: taste drift conflict detection', () => {
expect(r.status).toBe(0);
expect(r.stderr).not.toContain('taste drift');
});
test('drift warning fires from real CLI rejections (no seeding)', () => {
// Build the opposite signal through the real CLI: 4 rejections take confidence
// to 4/5 = 0.8, above the 0.6 drift threshold. Pre-fix every rejected entry was
// pinned to confidence 0, so this branch was unreachable without hand-seeding.
for (let i = 0; i < 4; i++) {
run(['rejected', `variant-${i}`, '--reason', 'fonts: Comic Sans']);
}
const r = run(['approved', 'v-approve', '--reason', 'fonts: Comic Sans']);
expect(r.status).toBe(0);
expect(r.stderr).toContain('taste drift');
expect(r.stderr).toContain('Comic Sans');
});
});
describe('taste-engine: migration', () => {