From 74036c88cb6477cc5821dd3f0a98e5911f6eac29 Mon Sep 17 00:00:00 2001 From: Jayesh Betala Date: Mon, 25 May 2026 00:42:09 +0530 Subject: [PATCH] fix(eval): validate eval list limit --- scripts/eval-list.ts | 15 ++++++- test/eval-list-cli.test.ts | 89 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 test/eval-list-cli.test.ts diff --git a/scripts/eval-list.ts b/scripts/eval-list.ts index 12c5f0a94..67d3f71a2 100644 --- a/scripts/eval-list.ts +++ b/scripts/eval-list.ts @@ -18,10 +18,23 @@ let filterBranch: string | null = null; let filterTier: string | null = null; let limit = 20; +function parseLimit(raw: string | undefined): number { + if (!raw || !/^[1-9]\d*$/.test(raw)) { + console.error('eval:list: --limit requires a positive integer'); + process.exit(1); + } + const parsed = Number(raw); + if (!Number.isSafeInteger(parsed)) { + console.error('eval:list: --limit requires a positive integer'); + process.exit(1); + } + return parsed; +} + for (let i = 0; i < args.length; i++) { if (args[i] === '--branch' && args[i + 1]) { filterBranch = args[++i]; } else if (args[i] === '--tier' && args[i + 1]) { filterTier = args[++i]; } - else if (args[i] === '--limit' && args[i + 1]) { limit = parseInt(args[++i], 10); } + else if (args[i] === '--limit') { limit = parseLimit(args[++i]); } } // Read eval files diff --git a/test/eval-list-cli.test.ts b/test/eval-list-cli.test.ts new file mode 100644 index 000000000..f742ec706 --- /dev/null +++ b/test/eval-list-cli.test.ts @@ -0,0 +1,89 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { spawnSync } from 'child_process'; + +const ROOT = path.resolve(import.meta.dir, '..'); + +let tmpHome: string; + +beforeEach(() => { + tmpHome = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-eval-list-')); + const evalDir = path.join(tmpHome, '.gstack-dev', 'evals'); + fs.mkdirSync(evalDir, { recursive: true }); + writeEvalRun(evalDir, '2026-a.json', '2026-05-24T01:00:00Z', 2); + writeEvalRun(evalDir, '2026-b.json', '2026-05-24T02:00:00Z', 3); +}); + +afterEach(() => { + fs.rmSync(tmpHome, { recursive: true, force: true }); +}); + +function writeEvalRun(evalDir: string, filename: string, timestamp: string, turns: number) { + fs.writeFileSync( + path.join(evalDir, filename), + JSON.stringify({ + schema_version: 1, + version: '1.44.0.0', + branch: 'main', + git_sha: filename, + timestamp, + tier: 'e2e', + total_tests: 1, + passed: 1, + failed: 0, + total_cost_usd: 0, + total_duration_ms: 1000, + tests: [ + { + name: filename, + suite: 'sample', + tier: 'e2e', + passed: true, + duration_ms: 1000, + cost_usd: 0, + turns_used: turns, + }, + ], + }), + ); +} + +function runEvalList(...args: string[]): { stdout: string; stderr: string; status: number } { + const result = spawnSync('bun', ['run', 'scripts/eval-list.ts', ...args], { + cwd: ROOT, + env: { + ...process.env, + HOME: tmpHome, + GSTACK_HOME: path.join(tmpHome, '.gstack'), + }, + encoding: 'utf-8', + }); + return { + stdout: result.stdout ?? '', + stderr: result.stderr ?? '', + status: result.status ?? -1, + }; +} + +describe('eval:list CLI', () => { + test('limits displayed eval runs with a valid positive integer', () => { + const result = runEvalList('--limit', '1'); + + expect(result.status).toBe(0); + expect(result.stdout).toContain('Eval History (2 total runs)'); + expect(result.stdout).toContain('Showing: 1'); + expect(result.stdout).toContain('2026-05-24 02:00'); + expect(result.stdout).not.toContain('2026-05-24 01:00'); + }); + + test('rejects malformed limit values instead of silently slicing output', () => { + for (const value of ['1abc', 'nope', '0', '-1', '1.5']) { + const result = runEvalList('--limit', value); + expect(result.status).not.toBe(0); + expect(result.stderr).toContain('--limit requires a positive integer'); + expect(result.stdout).toBe(''); + } + }); +});