mirror of https://github.com/garrytan/gstack.git
fix: Codex E2E uses ~/.codex/ auth, not OPENAI_API_KEY
- Remove OPENAI_API_KEY gate from test prerequisites - Copy real ~/.codex/ auth config into temp HOME so codex can authenticate - Increase review test timeout to 540s (codex does thorough 60+ tool call reviews) - Document in CLAUDE.md that Codex uses its own auth config Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f17bc8d908
commit
83ceee7a32
|
|
@ -20,9 +20,11 @@ bun run eval:compare # compare two eval runs (auto-picks most recent)
|
||||||
bun run eval:summary # aggregate stats across all eval runs
|
bun run eval:summary # aggregate stats across all eval runs
|
||||||
```
|
```
|
||||||
|
|
||||||
`test:evals` requires `ANTHROPIC_API_KEY`. E2E tests stream progress in real-time
|
`test:evals` requires `ANTHROPIC_API_KEY`. Codex E2E tests (`test/codex-e2e.test.ts`)
|
||||||
(tool-by-tool via `--output-format stream-json --verbose`). Results are persisted
|
use Codex's own auth from `~/.codex/` config — no `OPENAI_API_KEY` env var needed.
|
||||||
to `~/.gstack-dev/evals/` with auto-comparison against the previous run.
|
E2E tests stream progress in real-time (tool-by-tool via `--output-format stream-json
|
||||||
|
--verbose`). Results are persisted to `~/.gstack-dev/evals/` with auto-comparison
|
||||||
|
against the previous run.
|
||||||
|
|
||||||
**Diff-based test selection:** `test:evals` and `test:e2e` auto-select tests based
|
**Diff-based test selection:** `test:evals` and `test:e2e` auto-select tests based
|
||||||
on `git diff` against the base branch. Each test declares its file dependencies in
|
on `git diff` against the base branch. Each test declares its file dependencies in
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
*
|
*
|
||||||
* Prerequisites:
|
* Prerequisites:
|
||||||
* - `codex` binary installed (npm install -g @openai/codex)
|
* - `codex` binary installed (npm install -g @openai/codex)
|
||||||
* - OPENAI_API_KEY env var set
|
* - Codex authenticated via ~/.codex/ config (no OPENAI_API_KEY env var needed)
|
||||||
* - EVALS=1 env var set (same gate as Claude E2E tests)
|
* - EVALS=1 env var set (same gate as Claude E2E tests)
|
||||||
*
|
*
|
||||||
* Skips gracefully when prerequisites are not met.
|
* Skips gracefully when prerequisites are not met.
|
||||||
|
|
@ -34,11 +34,11 @@ const CODEX_AVAILABLE = (() => {
|
||||||
} catch { return false; }
|
} catch { return false; }
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const HAS_API_KEY = !!process.env.OPENAI_API_KEY;
|
|
||||||
const evalsEnabled = !!process.env.EVALS;
|
const evalsEnabled = !!process.env.EVALS;
|
||||||
|
|
||||||
// Skip all tests if codex is not available, API key is not set, or EVALS is not set
|
// Skip all tests if codex is not available or EVALS is not set.
|
||||||
const SKIP = !CODEX_AVAILABLE || !HAS_API_KEY || !evalsEnabled;
|
// Note: Codex uses its own auth from ~/.codex/ config — no OPENAI_API_KEY env var needed.
|
||||||
|
const SKIP = !CODEX_AVAILABLE || !evalsEnabled;
|
||||||
|
|
||||||
const describeCodex = SKIP ? describe.skip : describe;
|
const describeCodex = SKIP ? describe.skip : describe;
|
||||||
|
|
||||||
|
|
@ -47,8 +47,6 @@ if (!evalsEnabled) {
|
||||||
// Silent — same as Claude E2E tests, EVALS=1 required
|
// Silent — same as Claude E2E tests, EVALS=1 required
|
||||||
} else if (!CODEX_AVAILABLE) {
|
} else if (!CODEX_AVAILABLE) {
|
||||||
process.stderr.write('\nCodex E2E: SKIPPED — codex binary not found (install: npm i -g @openai/codex)\n');
|
process.stderr.write('\nCodex E2E: SKIPPED — codex binary not found (install: npm i -g @openai/codex)\n');
|
||||||
} else if (!HAS_API_KEY) {
|
|
||||||
process.stderr.write('\nCodex E2E: SKIPPED — OPENAI_API_KEY not set\n');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Diff-based test selection ---
|
// --- Diff-based test selection ---
|
||||||
|
|
@ -155,7 +153,7 @@ describeCodex('Codex E2E', () => {
|
||||||
const result = await runCodexSkill({
|
const result = await runCodexSkill({
|
||||||
skillDir,
|
skillDir,
|
||||||
prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
|
prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
|
||||||
timeoutMs: 300_000,
|
timeoutMs: 540_000,
|
||||||
cwd: ROOT,
|
cwd: ROOT,
|
||||||
skillName: 'gstack-review',
|
skillName: 'gstack-review',
|
||||||
});
|
});
|
||||||
|
|
@ -183,5 +181,5 @@ describeCodex('Codex E2E', () => {
|
||||||
outputLower.includes('p1') ||
|
outputLower.includes('p1') ||
|
||||||
outputLower.includes('p2');
|
outputLower.includes('p2');
|
||||||
expect(hasReviewContent).toBe(true);
|
expect(hasReviewContent).toBe(true);
|
||||||
}, 360_000);
|
}, 600_000);
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -164,10 +164,30 @@ export async function runCodexSkill(opts: {
|
||||||
|
|
||||||
// Set up temp HOME with skill installed
|
// Set up temp HOME with skill installed
|
||||||
const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-e2e-'));
|
const tempHome = fs.mkdtempSync(path.join(os.tmpdir(), 'codex-e2e-'));
|
||||||
|
const realHome = os.homedir();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
installSkillToTempHome(skillDir, name, tempHome);
|
installSkillToTempHome(skillDir, name, tempHome);
|
||||||
|
|
||||||
|
// Symlink real Codex auth config so codex can authenticate from temp HOME.
|
||||||
|
// Codex stores auth in ~/.codex/ — we need the config but not the skills
|
||||||
|
// (we install our own test skills above).
|
||||||
|
const realCodexConfig = path.join(realHome, '.codex');
|
||||||
|
const tempCodexDir = path.join(tempHome, '.codex');
|
||||||
|
if (fs.existsSync(realCodexConfig)) {
|
||||||
|
// Copy auth-related files from real ~/.codex/ into temp ~/.codex/
|
||||||
|
// (skills/ is already set up by installSkillToTempHome)
|
||||||
|
const entries = fs.readdirSync(realCodexConfig);
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (entry === 'skills') continue; // don't clobber our test skills
|
||||||
|
const src = path.join(realCodexConfig, entry);
|
||||||
|
const dst = path.join(tempCodexDir, entry);
|
||||||
|
if (!fs.existsSync(dst)) {
|
||||||
|
fs.cpSync(src, dst, { recursive: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build codex exec command
|
// Build codex exec command
|
||||||
const args = ['exec', prompt, '--json', '-s', sandbox];
|
const args = ['exec', prompt, '--json', '-s', sandbox];
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue