gstack/test/eval-list-cli.test.ts

90 lines
2.5 KiB
TypeScript

import { describe, test, expect, beforeEach, afterEach } from 'bun:test';
import * as fs from 'fs';
import * as path from 'path';
import * as os from 'os';
import { spawnSync } from 'child_process';
const ROOT = path.resolve(import.meta.dir, '..');
let tmpHome: string;
beforeEach(() => {
tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-eval-list-'));
const evalDir = path.join(tmpHome, '.gstack-dev', 'evals');
fs.mkdirSync(evalDir, { recursive: true });
writeEvalRun(evalDir, '2026-a.json', '2026-05-24T01:00:00Z', 2);
writeEvalRun(evalDir, '2026-b.json', '2026-05-24T02:00:00Z', 3);
});
afterEach(() => {
fs.rmSync(tmpHome, { recursive: true, force: true });
});
function writeEvalRun(evalDir: string, filename: string, timestamp: string, turns: number) {
fs.writeFileSync(
path.join(evalDir, filename),
JSON.stringify({
schema_version: 1,
version: '1.44.0.0',
branch: 'main',
git_sha: filename,
timestamp,
tier: 'e2e',
total_tests: 1,
passed: 1,
failed: 0,
total_cost_usd: 0,
total_duration_ms: 1000,
tests: [
{
name: filename,
suite: 'sample',
tier: 'e2e',
passed: true,
duration_ms: 1000,
cost_usd: 0,
turns_used: turns,
},
],
}),
);
}
function runEvalList(...args: string[]): { stdout: string; stderr: string; status: number } {
const result = spawnSync('bun', ['run', 'scripts/eval-list.ts', ...args], {
cwd: ROOT,
env: {
...process.env,
HOME: tmpHome,
GSTACK_HOME: path.join(tmpHome, '.gstack'),
},
encoding: 'utf-8',
});
return {
stdout: result.stdout ?? '',
stderr: result.stderr ?? '',
status: result.status ?? -1,
};
}
describe('eval:list CLI', () => {
test('limits displayed eval runs with a valid positive integer', () => {
const result = runEvalList('--limit', '1');
expect(result.status).toBe(0);
expect(result.stdout).toContain('Eval History (2 total runs)');
expect(result.stdout).toContain('Showing: 1');
expect(result.stdout).toContain('2026-05-24 02:00');
expect(result.stdout).not.toContain('2026-05-24 01:00');
});
test('rejects malformed limit values instead of silently slicing output', () => {
for (const value of ['1abc', 'nope', '0', '-1', '1.5']) {
const result = runEvalList('--limit', value);
expect(result.status).not.toBe(0);
expect(result.stderr).toContain('--limit requires a positive integer');
expect(result.stdout).toBe('');
}
});
});