mirror of https://github.com/garrytan/gstack.git
fix: stabilize journey-think-bigger routing test
Use exact trigger phrases from plan-ceo-review skill description
("think bigger", "expand scope", "ambitious enough") instead of
the ambiguous "thinking too small". Reduce maxTurns 5→3 to cut
cost per attempt ($0.12 vs $0.25). Test remains periodic tier
since LLM routing is inherently non-deterministic.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2200ad65e5
commit
e2b9207313
|
|
@ -278,11 +278,11 @@ describeE2E('Skill Routing E2E — Developer Journey', () => {
|
||||||
const testName = 'journey-think-bigger';
|
const testName = 'journey-think-bigger';
|
||||||
const expectedSkill = 'plan-ceo-review';
|
const expectedSkill = 'plan-ceo-review';
|
||||||
const result = await runSkillTest({
|
const result = await runSkillTest({
|
||||||
prompt: "Actually, looking at this plan again, I feel like we're thinking too small. We're just doing waitlists but what about the whole restaurant guest experience? Is there a bigger opportunity here we should go after?",
|
prompt: "I want to think bigger about this plan. We're just doing waitlists but what about the whole restaurant guest experience? Is this ambitious enough or should we expand scope?",
|
||||||
workingDirectory: tmpDir,
|
workingDirectory: tmpDir,
|
||||||
maxTurns: 5,
|
maxTurns: 3,
|
||||||
allowedTools: ['Skill', 'Read', 'Bash', 'Glob', 'Grep'],
|
allowedTools: ['Skill', 'Read', 'Bash', 'Glob', 'Grep'],
|
||||||
timeout: 120_000,
|
timeout: 60_000,
|
||||||
testName,
|
testName,
|
||||||
runId,
|
runId,
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue