mirror of https://github.com/garrytan/gstack.git
Merge remote-tracking branch 'origin/main' into garrytan/codex-reviews-default
This commit is contained in:
commit
f080038101
|
|
@ -358,7 +358,7 @@ The snapshot is your primary tool for understanding and interacting with pages.
|
||||||
-s <sel> --selector Scope to CSS selector
|
-s <sel> --selector Scope to CSS selector
|
||||||
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
||||||
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
||||||
-o <path> --output Output path for annotated screenshot (default: /tmp/browse-annotated.png)
|
-o <path> --output Output path for annotated screenshot (default: <temp>/browse-annotated.png)
|
||||||
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -486,7 +486,7 @@ The snapshot is your primary tool for understanding and interacting with pages.
|
||||||
-s <sel> --selector Scope to CSS selector
|
-s <sel> --selector Scope to CSS selector
|
||||||
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
||||||
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
||||||
-o <path> --output Output path for annotated screenshot (default: /tmp/browse-annotated.png)
|
-o <path> --output Output path for annotated screenshot (default: <temp>/browse-annotated.png)
|
||||||
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
20
CHANGELOG.md
20
CHANGELOG.md
|
|
@ -1,5 +1,25 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## [0.9.3.0] - 2026-03-20 — Windows Support
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- **gstack now works on Windows 11.** Setup no longer hangs when verifying Playwright, and the browse server automatically falls back to Node.js to work around a Bun pipe-handling bug on Windows ([bun#4253](https://github.com/oven-sh/bun/issues/4253)). Just make sure Node.js is installed alongside Bun. macOS and Linux are completely unaffected.
|
||||||
|
- **Path handling works on Windows.** All hardcoded `/tmp` paths and Unix-style path separators now use platform-aware equivalents via a new `platform.ts` module. Path traversal protection works correctly with Windows backslash separators.
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Bun API polyfill for Node.js.** When the browse server runs under Node.js on Windows, a compatibility layer provides `Bun.serve()`, `Bun.spawn()`, `Bun.spawnSync()`, and `Bun.sleep()` equivalents. Fully tested.
|
||||||
|
- **Node server build script.** `browse/scripts/build-node-server.sh` transpiles the server for Node.js, stubs `bun:sqlite`, and injects the polyfill — all automated during `bun run build`.
|
||||||
|
|
||||||
|
## [0.9.2.0] - 2026-03-20 — Gemini CLI E2E Tests
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **Gemini CLI is now tested end-to-end.** Two E2E tests verify that gstack skills work when invoked by Google's Gemini CLI (`gemini -p`). The `gemini-discover-skill` test confirms skill discovery from `.agents/skills/`, and `gemini-review-findings` runs a full code review via gstack-review. Both parse Gemini's stream-json NDJSON output and track token usage.
|
||||||
|
- **Gemini JSONL parser with 10 unit tests.** `parseGeminiJSONL` handles all Gemini event types (init, message, tool_use, tool_result, result) with defensive parsing for malformed input. The parser is a pure function, independently testable without spawning the CLI.
|
||||||
|
- **`bun run test:gemini`** and **`bun run test:gemini:all`** scripts for running Gemini E2E tests independently. Gemini tests are also included in `test:evals` and `test:e2e` aggregate scripts.
|
||||||
|
|
||||||
## [0.9.1.0] - 2026-03-20 — Adversarial Spec Review + Skill Chaining
|
## [0.9.1.0] - 2026-03-20 — Adversarial Spec Review + Skill Chaining
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ Expect first useful run in under 5 minutes on any repo with tests already set up
|
||||||
|
|
||||||
## Install — takes 30 seconds
|
## Install — takes 30 seconds
|
||||||
|
|
||||||
**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+
|
**Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+, [Node.js](https://nodejs.org/) (Windows only)
|
||||||
|
|
||||||
### Step 1: Install on your machine
|
### Step 1: Install on your machine
|
||||||
|
|
||||||
|
|
@ -238,6 +238,8 @@ Data is stored in [Supabase](https://supabase.com) (open source Firebase alterna
|
||||||
|
|
||||||
**Stale install?** Run `/gstack-upgrade` — or set `auto_upgrade: true` in `~/.gstack/config.yaml`
|
**Stale install?** Run `/gstack-upgrade` — or set `auto_upgrade: true` in `~/.gstack/config.yaml`
|
||||||
|
|
||||||
|
**Windows users:** gstack works on Windows 11 via Git Bash or WSL. Node.js is required in addition to Bun — Bun has a known bug with Playwright's pipe transport on Windows ([bun#4253](https://github.com/oven-sh/bun/issues/4253)). The browse server automatically falls back to Node.js. Make sure both `bun` and `node` are on your PATH.
|
||||||
|
|
||||||
**Claude says it can't see the skills?** Make sure your project's `CLAUDE.md` has a gstack section. Add this:
|
**Claude says it can't see the skills?** Make sure your project's `CLAUDE.md` has a gstack section. Add this:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
||||||
2
SKILL.md
2
SKILL.md
|
|
@ -492,7 +492,7 @@ The snapshot is your primary tool for understanding and interacting with pages.
|
||||||
-s <sel> --selector Scope to CSS selector
|
-s <sel> --selector Scope to CSS selector
|
||||||
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
||||||
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
||||||
-o <path> --output Output path for annotated screenshot (default: /tmp/browse-annotated.png)
|
-o <path> --output Output path for annotated screenshot (default: <temp>/browse-annotated.png)
|
||||||
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -364,7 +364,7 @@ The snapshot is your primary tool for understanding and interacting with pages.
|
||||||
-s <sel> --selector Scope to CSS selector
|
-s <sel> --selector Scope to CSS selector
|
||||||
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
-D --diff Unified diff against previous snapshot (first call stores baseline)
|
||||||
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
-a --annotate Annotated screenshot with red overlay boxes and ref labels
|
||||||
-o <path> --output Output path for annotated screenshot (default: /tmp/browse-annotated.png)
|
-o <path> --output Output path for annotated screenshot (default: <temp>/browse-annotated.png)
|
||||||
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Build a Node.js-compatible server bundle for Windows.
|
||||||
|
#
|
||||||
|
# On Windows, Bun can't launch or connect to Playwright's Chromium
|
||||||
|
# (oven-sh/bun#4253, #9911). This script produces a server bundle
|
||||||
|
# that runs under Node.js with Bun API polyfills.
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
GSTACK_DIR="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||||
|
SRC_DIR="$GSTACK_DIR/browse/src"
|
||||||
|
DIST_DIR="$GSTACK_DIR/browse/dist"
|
||||||
|
|
||||||
|
echo "Building Node-compatible server bundle..."
|
||||||
|
|
||||||
|
# Step 1: Transpile server.ts to a single .mjs bundle (externalize runtime deps)
|
||||||
|
bun build "$SRC_DIR/server.ts" \
|
||||||
|
--target=node \
|
||||||
|
--outfile "$DIST_DIR/server-node.mjs" \
|
||||||
|
--external playwright \
|
||||||
|
--external playwright-core \
|
||||||
|
--external diff \
|
||||||
|
--external "bun:sqlite"
|
||||||
|
|
||||||
|
# Step 2: Post-process
|
||||||
|
# Replace import.meta.dir with a resolvable reference
|
||||||
|
perl -pi -e 's/import\.meta\.dir/__browseNodeSrcDir/g' "$DIST_DIR/server-node.mjs"
|
||||||
|
# Stub out bun:sqlite (macOS-only cookie import, not needed on Windows)
|
||||||
|
perl -pi -e 's|import { Database } from "bun:sqlite";|const Database = null; // bun:sqlite stubbed on Node|g' "$DIST_DIR/server-node.mjs"
|
||||||
|
|
||||||
|
# Step 3: Create the final file with polyfill header injected after the first line
|
||||||
|
{
|
||||||
|
head -1 "$DIST_DIR/server-node.mjs"
|
||||||
|
echo '// ── Windows Node.js compatibility (auto-generated) ──'
|
||||||
|
echo 'import { fileURLToPath as _ftp } from "node:url";'
|
||||||
|
echo 'import { dirname as _dn } from "node:path";'
|
||||||
|
echo 'const __browseNodeSrcDir = _dn(_dn(_ftp(import.meta.url))) + "/src";'
|
||||||
|
echo '{ const _r = createRequire(import.meta.url); _r("./bun-polyfill.cjs"); }'
|
||||||
|
echo '// ── end compatibility ──'
|
||||||
|
tail -n +2 "$DIST_DIR/server-node.mjs"
|
||||||
|
} > "$DIST_DIR/server-node.tmp.mjs"
|
||||||
|
|
||||||
|
mv "$DIST_DIR/server-node.tmp.mjs" "$DIST_DIR/server-node.mjs"
|
||||||
|
|
||||||
|
# Step 4: Copy polyfill to dist/
|
||||||
|
cp "$SRC_DIR/bun-polyfill.cjs" "$DIST_DIR/bun-polyfill.cjs"
|
||||||
|
|
||||||
|
echo "Node server bundle ready: $DIST_DIR/server-node.mjs"
|
||||||
|
|
@ -0,0 +1,109 @@
|
||||||
|
/**
|
||||||
|
* Bun API polyfill for Node.js — Windows compatibility layer.
|
||||||
|
*
|
||||||
|
* On Windows, Bun can't launch or connect to Playwright's Chromium
|
||||||
|
* (oven-sh/bun#4253, #9911). The browse server falls back to running
|
||||||
|
* under Node.js with this polyfill providing Bun API equivalents.
|
||||||
|
*
|
||||||
|
* Loaded via --require before the transpiled server bundle.
|
||||||
|
*/
|
||||||
|
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
const http = require('http');
|
||||||
|
const { spawnSync, spawn } = require('child_process');
|
||||||
|
|
||||||
|
globalThis.Bun = {
|
||||||
|
serve(options) {
|
||||||
|
const { port, hostname = '127.0.0.1', fetch } = options;
|
||||||
|
|
||||||
|
const server = http.createServer(async (nodeReq, nodeRes) => {
|
||||||
|
try {
|
||||||
|
const url = `http://${hostname}:${port}${nodeReq.url}`;
|
||||||
|
const headers = new Headers();
|
||||||
|
for (const [key, val] of Object.entries(nodeReq.headers)) {
|
||||||
|
if (val) headers.set(key, Array.isArray(val) ? val[0] : val);
|
||||||
|
}
|
||||||
|
|
||||||
|
let body = null;
|
||||||
|
if (nodeReq.method !== 'GET' && nodeReq.method !== 'HEAD') {
|
||||||
|
body = await new Promise((resolve) => {
|
||||||
|
const chunks = [];
|
||||||
|
nodeReq.on('data', (chunk) => chunks.push(chunk));
|
||||||
|
nodeReq.on('end', () => resolve(Buffer.concat(chunks)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const webReq = new Request(url, {
|
||||||
|
method: nodeReq.method,
|
||||||
|
headers,
|
||||||
|
body,
|
||||||
|
});
|
||||||
|
|
||||||
|
const webRes = await fetch(webReq);
|
||||||
|
|
||||||
|
nodeRes.statusCode = webRes.status;
|
||||||
|
webRes.headers.forEach((val, key) => {
|
||||||
|
nodeRes.setHeader(key, val);
|
||||||
|
});
|
||||||
|
|
||||||
|
const resBody = await webRes.arrayBuffer();
|
||||||
|
nodeRes.end(Buffer.from(resBody));
|
||||||
|
} catch (err) {
|
||||||
|
nodeRes.statusCode = 500;
|
||||||
|
nodeRes.end(JSON.stringify({ error: err.message }));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
server.listen(port, hostname);
|
||||||
|
|
||||||
|
return {
|
||||||
|
stop() { server.close(); },
|
||||||
|
port,
|
||||||
|
hostname,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
spawnSync(cmd, options = {}) {
|
||||||
|
const [command, ...args] = cmd;
|
||||||
|
const result = spawnSync(command, args, {
|
||||||
|
stdio: [
|
||||||
|
options.stdin || 'pipe',
|
||||||
|
options.stdout === 'pipe' ? 'pipe' : 'ignore',
|
||||||
|
options.stderr === 'pipe' ? 'pipe' : 'ignore',
|
||||||
|
],
|
||||||
|
timeout: options.timeout,
|
||||||
|
env: options.env,
|
||||||
|
cwd: options.cwd,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
exitCode: result.status,
|
||||||
|
stdout: result.stdout || Buffer.from(''),
|
||||||
|
stderr: result.stderr || Buffer.from(''),
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
spawn(cmd, options = {}) {
|
||||||
|
const [command, ...args] = cmd;
|
||||||
|
const stdio = options.stdio || ['pipe', 'pipe', 'pipe'];
|
||||||
|
const proc = spawn(command, args, {
|
||||||
|
stdio,
|
||||||
|
env: options.env,
|
||||||
|
cwd: options.cwd,
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
pid: proc.pid,
|
||||||
|
stdout: proc.stdout,
|
||||||
|
stderr: proc.stderr,
|
||||||
|
stdin: proc.stdin,
|
||||||
|
unref() { proc.unref(); },
|
||||||
|
kill(signal) { proc.kill(signal); },
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
sleep(ms) {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
@ -14,7 +14,8 @@ import * as path from 'path';
|
||||||
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
|
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
|
||||||
|
|
||||||
const config = resolveConfig();
|
const config = resolveConfig();
|
||||||
const MAX_START_WAIT = 8000; // 8 seconds to start
|
const IS_WINDOWS = process.platform === 'win32';
|
||||||
|
const MAX_START_WAIT = IS_WINDOWS ? 15000 : 8000; // Node+Chromium takes longer on Windows
|
||||||
|
|
||||||
export function resolveServerScript(
|
export function resolveServerScript(
|
||||||
env: Record<string, string | undefined> = process.env,
|
env: Record<string, string | undefined> = process.env,
|
||||||
|
|
@ -26,7 +27,9 @@ export function resolveServerScript(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dev mode: cli.ts runs directly from browse/src
|
// Dev mode: cli.ts runs directly from browse/src
|
||||||
if (metaDir.startsWith('/') && !metaDir.includes('$bunfs')) {
|
// On macOS/Linux, import.meta.dir starts with /
|
||||||
|
// On Windows, it starts with a drive letter (e.g., C:\...)
|
||||||
|
if (!metaDir.includes('$bunfs')) {
|
||||||
const direct = path.resolve(metaDir, 'server.ts');
|
const direct = path.resolve(metaDir, 'server.ts');
|
||||||
if (fs.existsSync(direct)) {
|
if (fs.existsSync(direct)) {
|
||||||
return direct;
|
return direct;
|
||||||
|
|
@ -48,6 +51,31 @@ export function resolveServerScript(
|
||||||
|
|
||||||
const SERVER_SCRIPT = resolveServerScript();
|
const SERVER_SCRIPT = resolveServerScript();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* On Windows, resolve the Node.js-compatible server bundle.
|
||||||
|
* Falls back to null if not found (server will use Bun instead).
|
||||||
|
*/
|
||||||
|
export function resolveNodeServerScript(
|
||||||
|
metaDir: string = import.meta.dir,
|
||||||
|
execPath: string = process.execPath
|
||||||
|
): string | null {
|
||||||
|
// Dev mode
|
||||||
|
if (!metaDir.includes('$bunfs')) {
|
||||||
|
const distScript = path.resolve(metaDir, '..', 'dist', 'server-node.mjs');
|
||||||
|
if (fs.existsSync(distScript)) return distScript;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compiled binary: browse/dist/browse → browse/dist/server-node.mjs
|
||||||
|
if (execPath) {
|
||||||
|
const adjacent = path.resolve(path.dirname(execPath), 'server-node.mjs');
|
||||||
|
if (fs.existsSync(adjacent)) return adjacent;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const NODE_SERVER_SCRIPT = IS_WINDOWS ? resolveNodeServerScript() : null;
|
||||||
|
|
||||||
interface ServerState {
|
interface ServerState {
|
||||||
pid: number;
|
pid: number;
|
||||||
port: number;
|
port: number;
|
||||||
|
|
@ -139,8 +167,14 @@ async function startServer(): Promise<ServerState> {
|
||||||
// Clean up stale state file
|
// Clean up stale state file
|
||||||
try { fs.unlinkSync(config.stateFile); } catch {}
|
try { fs.unlinkSync(config.stateFile); } catch {}
|
||||||
|
|
||||||
// Start server as detached background process
|
// Start server as detached background process.
|
||||||
const proc = Bun.spawn(['bun', 'run', SERVER_SCRIPT], {
|
// On Windows, Bun can't launch/connect to Playwright's Chromium (oven-sh/bun#4253, #9911).
|
||||||
|
// Fall back to running the server under Node.js with Bun API polyfills.
|
||||||
|
const useNode = IS_WINDOWS && NODE_SERVER_SCRIPT;
|
||||||
|
const serverCmd = useNode
|
||||||
|
? ['node', NODE_SERVER_SCRIPT]
|
||||||
|
: ['bun', 'run', SERVER_SCRIPT];
|
||||||
|
const proc = Bun.spawn(serverCmd, {
|
||||||
stdio: ['ignore', 'pipe', 'pipe'],
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile },
|
env: { ...process.env, BROWSE_STATE_FILE: config.stateFile },
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -10,13 +10,14 @@ import { validateNavigationUrl } from './url-validation';
|
||||||
import * as Diff from 'diff';
|
import * as Diff from 'diff';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
import { TEMP_DIR, isPathWithin } from './platform';
|
||||||
|
|
||||||
// Security: Path validation to prevent path traversal attacks
|
// Security: Path validation to prevent path traversal attacks
|
||||||
const SAFE_DIRECTORIES = ['/tmp', process.cwd()];
|
const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()];
|
||||||
|
|
||||||
export function validateOutputPath(filePath: string): void {
|
export function validateOutputPath(filePath: string): void {
|
||||||
const resolved = path.resolve(filePath);
|
const resolved = path.resolve(filePath);
|
||||||
const isSafe = SAFE_DIRECTORIES.some(dir => resolved === dir || resolved.startsWith(dir + '/'));
|
const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(resolved, dir));
|
||||||
if (!isSafe) {
|
if (!isSafe) {
|
||||||
throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
||||||
}
|
}
|
||||||
|
|
@ -88,7 +89,7 @@ export async function handleMetaCommand(
|
||||||
case 'screenshot': {
|
case 'screenshot': {
|
||||||
// Parse priority: flags (--viewport, --clip) → selector (@ref, CSS) → output path
|
// Parse priority: flags (--viewport, --clip) → selector (@ref, CSS) → output path
|
||||||
const page = bm.getPage();
|
const page = bm.getPage();
|
||||||
let outputPath = '/tmp/browse-screenshot.png';
|
let outputPath = `${TEMP_DIR}/browse-screenshot.png`;
|
||||||
let clipRect: { x: number; y: number; width: number; height: number } | undefined;
|
let clipRect: { x: number; y: number; width: number; height: number } | undefined;
|
||||||
let targetSelector: string | undefined;
|
let targetSelector: string | undefined;
|
||||||
let viewportOnly = false;
|
let viewportOnly = false;
|
||||||
|
|
@ -147,7 +148,7 @@ export async function handleMetaCommand(
|
||||||
|
|
||||||
case 'pdf': {
|
case 'pdf': {
|
||||||
const page = bm.getPage();
|
const page = bm.getPage();
|
||||||
const pdfPath = args[0] || '/tmp/browse-page.pdf';
|
const pdfPath = args[0] || `${TEMP_DIR}/browse-page.pdf`;
|
||||||
validateOutputPath(pdfPath);
|
validateOutputPath(pdfPath);
|
||||||
await page.pdf({ path: pdfPath, format: 'A4' });
|
await page.pdf({ path: pdfPath, format: 'A4' });
|
||||||
return `PDF saved: ${pdfPath}`;
|
return `PDF saved: ${pdfPath}`;
|
||||||
|
|
@ -155,7 +156,7 @@ export async function handleMetaCommand(
|
||||||
|
|
||||||
case 'responsive': {
|
case 'responsive': {
|
||||||
const page = bm.getPage();
|
const page = bm.getPage();
|
||||||
const prefix = args[0] || '/tmp/browse-responsive';
|
const prefix = args[0] || `${TEMP_DIR}/browse-responsive`;
|
||||||
validateOutputPath(prefix);
|
validateOutputPath(prefix);
|
||||||
const viewports = [
|
const viewports = [
|
||||||
{ name: 'mobile', width: 375, height: 812 },
|
{ name: 'mobile', width: 375, height: 812 },
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
/**
|
||||||
|
* Cross-platform constants for gstack browse.
|
||||||
|
*
|
||||||
|
* On macOS/Linux: TEMP_DIR = '/tmp', path.sep = '/' — identical to hardcoded values.
|
||||||
|
* On Windows: TEMP_DIR = os.tmpdir(), path.sep = '\\' — correct Windows behavior.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as os from 'os';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
export const IS_WINDOWS = process.platform === 'win32';
|
||||||
|
export const TEMP_DIR = IS_WINDOWS ? os.tmpdir() : '/tmp';
|
||||||
|
|
||||||
|
/** Check if resolvedPath is within dir, using platform-aware separators. */
|
||||||
|
export function isPathWithin(resolvedPath: string, dir: string): boolean {
|
||||||
|
return resolvedPath === dir || resolvedPath.startsWith(dir + path.sep);
|
||||||
|
}
|
||||||
|
|
@ -10,6 +10,7 @@ import { consoleBuffer, networkBuffer, dialogBuffer } from './buffers';
|
||||||
import type { Page } from 'playwright';
|
import type { Page } from 'playwright';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
import { TEMP_DIR, isPathWithin } from './platform';
|
||||||
|
|
||||||
/** Detect await keyword, ignoring comments. Accepted risk: await in string literals triggers wrapping (harmless). */
|
/** Detect await keyword, ignoring comments. Accepted risk: await in string literals triggers wrapping (harmless). */
|
||||||
function hasAwait(code: string): boolean {
|
function hasAwait(code: string): boolean {
|
||||||
|
|
@ -36,12 +37,12 @@ function wrapForEvaluate(code: string): string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Security: Path validation to prevent path traversal attacks
|
// Security: Path validation to prevent path traversal attacks
|
||||||
const SAFE_DIRECTORIES = ['/tmp', process.cwd()];
|
const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()];
|
||||||
|
|
||||||
export function validateReadPath(filePath: string): void {
|
export function validateReadPath(filePath: string): void {
|
||||||
if (path.isAbsolute(filePath)) {
|
if (path.isAbsolute(filePath)) {
|
||||||
const resolved = path.resolve(filePath);
|
const resolved = path.resolve(filePath);
|
||||||
const isSafe = SAFE_DIRECTORIES.some(dir => resolved === dir || resolved.startsWith(dir + '/'));
|
const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(resolved, dir));
|
||||||
if (!isSafe) {
|
if (!isSafe) {
|
||||||
throw new Error(`Absolute path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
throw new Error(`Absolute path must be within: ${SAFE_DIRECTORIES.join(', ')}`);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@
|
||||||
import type { Page, Locator } from 'playwright';
|
import type { Page, Locator } from 'playwright';
|
||||||
import type { BrowserManager, RefEntry } from './browser-manager';
|
import type { BrowserManager, RefEntry } from './browser-manager';
|
||||||
import * as Diff from 'diff';
|
import * as Diff from 'diff';
|
||||||
|
import { TEMP_DIR, isPathWithin } from './platform';
|
||||||
|
|
||||||
// Roles considered "interactive" for the -i flag
|
// Roles considered "interactive" for the -i flag
|
||||||
const INTERACTIVE_ROLES = new Set([
|
const INTERACTIVE_ROLES = new Set([
|
||||||
|
|
@ -61,7 +62,7 @@ export const SNAPSHOT_FLAGS: Array<{
|
||||||
{ short: '-s', long: '--selector', description: 'Scope to CSS selector', takesValue: true, valueHint: '<sel>', optionKey: 'selector' },
|
{ short: '-s', long: '--selector', description: 'Scope to CSS selector', takesValue: true, valueHint: '<sel>', optionKey: 'selector' },
|
||||||
{ short: '-D', long: '--diff', description: 'Unified diff against previous snapshot (first call stores baseline)', optionKey: 'diff' },
|
{ short: '-D', long: '--diff', description: 'Unified diff against previous snapshot (first call stores baseline)', optionKey: 'diff' },
|
||||||
{ short: '-a', long: '--annotate', description: 'Annotated screenshot with red overlay boxes and ref labels', optionKey: 'annotate' },
|
{ short: '-a', long: '--annotate', description: 'Annotated screenshot with red overlay boxes and ref labels', optionKey: 'annotate' },
|
||||||
{ short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: /tmp/browse-annotated.png)', takesValue: true, valueHint: '<path>', optionKey: 'outputPath' },
|
{ short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: <temp>/browse-annotated.png)', takesValue: true, valueHint: '<path>', optionKey: 'outputPath' },
|
||||||
{ short: '-C', long: '--cursor-interactive', description: 'Cursor-interactive elements (@c refs — divs with pointer, onclick)', optionKey: 'cursorInteractive' },
|
{ short: '-C', long: '--cursor-interactive', description: 'Cursor-interactive elements (@c refs — divs with pointer, onclick)', optionKey: 'cursorInteractive' },
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
@ -308,11 +309,11 @@ export async function handleSnapshot(
|
||||||
|
|
||||||
// ─── Annotated screenshot (-a) ────────────────────────────
|
// ─── Annotated screenshot (-a) ────────────────────────────
|
||||||
if (opts.annotate) {
|
if (opts.annotate) {
|
||||||
const screenshotPath = opts.outputPath || '/tmp/browse-annotated.png';
|
const screenshotPath = opts.outputPath || `${TEMP_DIR}/browse-annotated.png`;
|
||||||
// Validate output path (consistent with screenshot/pdf/responsive)
|
// Validate output path (consistent with screenshot/pdf/responsive)
|
||||||
const resolvedPath = require('path').resolve(screenshotPath);
|
const resolvedPath = require('path').resolve(screenshotPath);
|
||||||
const safeDirs = ['/tmp', process.cwd()];
|
const safeDirs = [TEMP_DIR, process.cwd()];
|
||||||
if (!safeDirs.some((dir: string) => resolvedPath === dir || resolvedPath.startsWith(dir + '/'))) {
|
if (!safeDirs.some((dir: string) => isPathWithin(resolvedPath, dir))) {
|
||||||
throw new Error(`Path must be within: ${safeDirs.join(', ')}`);
|
throw new Error(`Path must be within: ${safeDirs.join(', ')}`);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import { findInstalledBrowsers, importCookies } from './cookie-import-browser';
|
||||||
import { validateNavigationUrl } from './url-validation';
|
import { validateNavigationUrl } from './url-validation';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
|
import { TEMP_DIR, isPathWithin } from './platform';
|
||||||
|
|
||||||
export async function handleWriteCommand(
|
export async function handleWriteCommand(
|
||||||
command: string,
|
command: string,
|
||||||
|
|
@ -277,9 +278,9 @@ export async function handleWriteCommand(
|
||||||
if (!filePath) throw new Error('Usage: browse cookie-import <json-file>');
|
if (!filePath) throw new Error('Usage: browse cookie-import <json-file>');
|
||||||
// Path validation — prevent reading arbitrary files
|
// Path validation — prevent reading arbitrary files
|
||||||
if (path.isAbsolute(filePath)) {
|
if (path.isAbsolute(filePath)) {
|
||||||
const safeDirs = ['/tmp', process.cwd()];
|
const safeDirs = [TEMP_DIR, process.cwd()];
|
||||||
const resolved = path.resolve(filePath);
|
const resolved = path.resolve(filePath);
|
||||||
if (!safeDirs.some(dir => resolved === dir || resolved.startsWith(dir + '/'))) {
|
if (!safeDirs.some(dir => isPathWithin(resolved, dir))) {
|
||||||
throw new Error(`Path must be within: ${safeDirs.join(', ')}`);
|
throw new Error(`Path must be within: ${safeDirs.join(', ')}`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
import { describe, test, expect, afterAll } from 'bun:test';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
// Load the polyfill into a fresh object (don't clobber globalThis.Bun)
|
||||||
|
const polyfillPath = path.resolve(import.meta.dir, '../src/bun-polyfill.cjs');
|
||||||
|
|
||||||
|
describe('bun-polyfill', () => {
|
||||||
|
// We test the polyfill by requiring it in a subprocess under Node.js
|
||||||
|
// since it's designed for Node, not Bun.
|
||||||
|
|
||||||
|
test('Bun.sleep resolves after delay', async () => {
|
||||||
|
const result = Bun.spawnSync(['node', '-e', `
|
||||||
|
require('${polyfillPath}');
|
||||||
|
(async () => {
|
||||||
|
const start = Date.now();
|
||||||
|
await Bun.sleep(50);
|
||||||
|
const elapsed = Date.now() - start;
|
||||||
|
console.log(elapsed >= 40 ? 'OK' : 'TOO_FAST');
|
||||||
|
})();
|
||||||
|
`], { stdout: 'pipe', stderr: 'pipe' });
|
||||||
|
expect(result.stdout.toString().trim()).toBe('OK');
|
||||||
|
expect(result.exitCode).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Bun.spawnSync runs a command and returns stdout', () => {
|
||||||
|
const result = Bun.spawnSync(['node', '-e', `
|
||||||
|
require('${polyfillPath}');
|
||||||
|
const r = Bun.spawnSync(['echo', 'hello'], { stdout: 'pipe' });
|
||||||
|
console.log(r.stdout.toString().trim());
|
||||||
|
console.log('exit:' + r.exitCode);
|
||||||
|
`], { stdout: 'pipe', stderr: 'pipe' });
|
||||||
|
const lines = result.stdout.toString().trim().split('\n');
|
||||||
|
expect(lines[0]).toBe('hello');
|
||||||
|
expect(lines[1]).toBe('exit:0');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Bun.spawn launches a process with pid', async () => {
|
||||||
|
const result = Bun.spawnSync(['node', '-e', `
|
||||||
|
require('${polyfillPath}');
|
||||||
|
const p = Bun.spawn(['echo', 'test'], { stdio: ['pipe', 'pipe', 'pipe'] });
|
||||||
|
console.log(typeof p.pid === 'number' ? 'HAS_PID' : 'NO_PID');
|
||||||
|
console.log(typeof p.kill === 'function' ? 'HAS_KILL' : 'NO_KILL');
|
||||||
|
console.log(typeof p.unref === 'function' ? 'HAS_UNREF' : 'NO_UNREF');
|
||||||
|
`], { stdout: 'pipe', stderr: 'pipe' });
|
||||||
|
const lines = result.stdout.toString().trim().split('\n');
|
||||||
|
expect(lines[0]).toBe('HAS_PID');
|
||||||
|
expect(lines[1]).toBe('HAS_KILL');
|
||||||
|
expect(lines[2]).toBe('HAS_UNREF');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('Bun.serve creates an HTTP server that responds', async () => {
|
||||||
|
const result = Bun.spawnSync(['node', '-e', `
|
||||||
|
require('${polyfillPath}');
|
||||||
|
const server = Bun.serve({
|
||||||
|
port: 0, // Note: polyfill uses port directly, so we pick one
|
||||||
|
hostname: '127.0.0.1',
|
||||||
|
fetch(req) {
|
||||||
|
return new Response(JSON.stringify({ ok: true }), {
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
});
|
||||||
|
},
|
||||||
|
});
|
||||||
|
// The polyfill doesn't support port 0, so we test the object shape
|
||||||
|
console.log(typeof server.stop === 'function' ? 'HAS_STOP' : 'NO_STOP');
|
||||||
|
console.log(typeof server.port === 'number' ? 'HAS_PORT' : 'NO_PORT');
|
||||||
|
server.stop();
|
||||||
|
`], { stdout: 'pipe', stderr: 'pipe' });
|
||||||
|
const lines = result.stdout.toString().trim().split('\n');
|
||||||
|
expect(lines[0]).toBe('HAS_STOP');
|
||||||
|
expect(lines[1]).toBe('HAS_PORT');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -197,6 +197,36 @@ describe('resolveServerScript', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('resolveNodeServerScript', () => {
|
||||||
|
const { resolveNodeServerScript } = require('../src/cli');
|
||||||
|
|
||||||
|
test('finds server-node.mjs in dist from dev mode', () => {
|
||||||
|
const srcDir = path.resolve(__dirname, '../src');
|
||||||
|
const distFile = path.resolve(srcDir, '..', 'dist', 'server-node.mjs');
|
||||||
|
const fs = require('fs');
|
||||||
|
// Only test if the file exists (it may not be built yet)
|
||||||
|
if (fs.existsSync(distFile)) {
|
||||||
|
const result = resolveNodeServerScript(srcDir, '');
|
||||||
|
expect(result).toBe(distFile);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('returns null when server-node.mjs does not exist', () => {
|
||||||
|
const result = resolveNodeServerScript('/nonexistent/$bunfs', '/nonexistent/browse');
|
||||||
|
expect(result).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('finds server-node.mjs adjacent to compiled binary', () => {
|
||||||
|
const distDir = path.resolve(__dirname, '../dist');
|
||||||
|
const distFile = path.join(distDir, 'server-node.mjs');
|
||||||
|
const fs = require('fs');
|
||||||
|
if (fs.existsSync(distFile)) {
|
||||||
|
const result = resolveNodeServerScript('/$bunfs/something', path.join(distDir, 'browse'));
|
||||||
|
expect(result).toBe(distFile);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('version mismatch detection', () => {
|
describe('version mismatch detection', () => {
|
||||||
test('detects when versions differ', () => {
|
test('detects when versions differ', () => {
|
||||||
const stateVersion = 'abc123';
|
const stateVersion = 'abc123';
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
import { describe, test, expect } from 'bun:test';
|
||||||
|
import { TEMP_DIR, isPathWithin, IS_WINDOWS } from '../src/platform';
|
||||||
|
|
||||||
|
describe('platform constants', () => {
|
||||||
|
test('TEMP_DIR is /tmp on non-Windows', () => {
|
||||||
|
if (!IS_WINDOWS) {
|
||||||
|
expect(TEMP_DIR).toBe('/tmp');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test('IS_WINDOWS reflects process.platform', () => {
|
||||||
|
expect(IS_WINDOWS).toBe(process.platform === 'win32');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('isPathWithin', () => {
|
||||||
|
test('path inside directory returns true', () => {
|
||||||
|
expect(isPathWithin('/tmp/foo', '/tmp')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('path outside directory returns false', () => {
|
||||||
|
expect(isPathWithin('/etc/foo', '/tmp')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('exact match returns true', () => {
|
||||||
|
expect(isPathWithin('/tmp', '/tmp')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('partial prefix does not match (path traversal)', () => {
|
||||||
|
// /tmp-evil should NOT match /tmp
|
||||||
|
expect(isPathWithin('/tmp-evil/foo', '/tmp')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('nested path returns true', () => {
|
||||||
|
expect(isPathWithin('/tmp/a/b/c', '/tmp')).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
14
package.json
14
package.json
|
|
@ -8,17 +8,19 @@
|
||||||
"browse": "./browse/dist/browse"
|
"browse": "./browse/dist/browse"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "bun run gen:skill-docs && bun run gen:skill-docs --host codex && bun build --compile browse/src/cli.ts --outfile browse/dist/browse && bun build --compile browse/src/find-browse.ts --outfile browse/dist/find-browse && git rev-parse HEAD > browse/dist/.version && rm -f .*.bun-build || true",
|
"build": "bun run gen:skill-docs && bun run gen:skill-docs --host codex && bun build --compile browse/src/cli.ts --outfile browse/dist/browse && bun build --compile browse/src/find-browse.ts --outfile browse/dist/find-browse && bash browse/scripts/build-node-server.sh && git rev-parse HEAD > browse/dist/.version && rm -f .*.bun-build || true",
|
||||||
"gen:skill-docs": "bun run scripts/gen-skill-docs.ts",
|
"gen:skill-docs": "bun run scripts/gen-skill-docs.ts",
|
||||||
"dev": "bun run browse/src/cli.ts",
|
"dev": "bun run browse/src/cli.ts",
|
||||||
"server": "bun run browse/src/server.ts",
|
"server": "bun run browse/src/server.ts",
|
||||||
"test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts",
|
"test": "bun test browse/test/ test/ --ignore test/skill-e2e.test.ts --ignore test/skill-llm-eval.test.ts --ignore test/skill-routing-e2e.test.ts --ignore test/codex-e2e.test.ts --ignore test/gemini-e2e.test.ts",
|
||||||
"test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
"test:evals": "EVALS=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
"test:evals:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-llm-eval.test.ts test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
"test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
"test:e2e": "EVALS=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
"test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts",
|
"test:e2e:all": "EVALS=1 EVALS_ALL=1 bun test test/skill-e2e.test.ts test/skill-routing-e2e.test.ts test/codex-e2e.test.ts test/gemini-e2e.test.ts",
|
||||||
"test:codex": "EVALS=1 bun test test/codex-e2e.test.ts",
|
"test:codex": "EVALS=1 bun test test/codex-e2e.test.ts",
|
||||||
"test:codex:all": "EVALS=1 EVALS_ALL=1 bun test test/codex-e2e.test.ts",
|
"test:codex:all": "EVALS=1 EVALS_ALL=1 bun test test/codex-e2e.test.ts",
|
||||||
|
"test:gemini": "EVALS=1 bun test test/gemini-e2e.test.ts",
|
||||||
|
"test:gemini:all": "EVALS=1 EVALS_ALL=1 bun test test/gemini-e2e.test.ts",
|
||||||
"skill:check": "bun run scripts/skill-check.ts",
|
"skill:check": "bun run scripts/skill-check.ts",
|
||||||
"dev:skill": "bun run scripts/dev-skill.ts",
|
"dev:skill": "bun run scripts/dev-skill.ts",
|
||||||
"start": "bun run browse/src/server.ts",
|
"start": "bun run browse/src/server.ts",
|
||||||
|
|
|
||||||
46
setup
46
setup
|
|
@ -12,6 +12,11 @@ GSTACK_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
SKILLS_DIR="$(dirname "$GSTACK_DIR")"
|
SKILLS_DIR="$(dirname "$GSTACK_DIR")"
|
||||||
BROWSE_BIN="$GSTACK_DIR/browse/dist/browse"
|
BROWSE_BIN="$GSTACK_DIR/browse/dist/browse"
|
||||||
|
|
||||||
|
IS_WINDOWS=0
|
||||||
|
case "$(uname -s)" in
|
||||||
|
MINGW*|MSYS*|CYGWIN*|Windows_NT) IS_WINDOWS=1 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
# ─── Parse --host flag ─────────────────────────────────────────
|
# ─── Parse --host flag ─────────────────────────────────────────
|
||||||
HOST="claude"
|
HOST="claude"
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
|
|
@ -44,10 +49,19 @@ elif [ "$HOST" = "codex" ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
ensure_playwright_browser() {
|
ensure_playwright_browser() {
|
||||||
(
|
if [ "$IS_WINDOWS" -eq 1 ]; then
|
||||||
cd "$GSTACK_DIR"
|
# On Windows, Bun can't launch Chromium due to broken pipe handling
|
||||||
bun --eval 'import { chromium } from "playwright"; const browser = await chromium.launch(); await browser.close();'
|
# (oven-sh/bun#4253). Use Node.js to verify Chromium works instead.
|
||||||
) >/dev/null 2>&1
|
(
|
||||||
|
cd "$GSTACK_DIR"
|
||||||
|
node -e "const { chromium } = require('playwright'); (async () => { const b = await chromium.launch(); await b.close(); })()" 2>/dev/null
|
||||||
|
)
|
||||||
|
else
|
||||||
|
(
|
||||||
|
cd "$GSTACK_DIR"
|
||||||
|
bun --eval 'import { chromium } from "playwright"; const browser = await chromium.launch(); await browser.close();'
|
||||||
|
) >/dev/null 2>&1
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# 1. Build browse binary if needed (smart rebuild: stale sources, package.json, lock)
|
# 1. Build browse binary if needed (smart rebuild: stale sources, package.json, lock)
|
||||||
|
|
@ -87,10 +101,32 @@ if ! ensure_playwright_browser; then
|
||||||
cd "$GSTACK_DIR"
|
cd "$GSTACK_DIR"
|
||||||
bunx playwright install chromium
|
bunx playwright install chromium
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if [ "$IS_WINDOWS" -eq 1 ]; then
|
||||||
|
# On Windows, Node.js launches Chromium (not Bun — see oven-sh/bun#4253).
|
||||||
|
# Ensure playwright is importable by Node from the gstack directory.
|
||||||
|
if ! command -v node >/dev/null 2>&1; then
|
||||||
|
echo "gstack setup failed: Node.js is required on Windows (Bun cannot launch Chromium due to a pipe bug)" >&2
|
||||||
|
echo " Install Node.js: https://nodejs.org/" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Windows detected — verifying Node.js can load Playwright..."
|
||||||
|
(
|
||||||
|
cd "$GSTACK_DIR"
|
||||||
|
# Bun's node_modules already has playwright; verify Node can require it
|
||||||
|
node -e "require('playwright')" 2>/dev/null || npm install --no-save playwright
|
||||||
|
)
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if ! ensure_playwright_browser; then
|
if ! ensure_playwright_browser; then
|
||||||
echo "gstack setup failed: Playwright Chromium could not be launched" >&2
|
if [ "$IS_WINDOWS" -eq 1 ]; then
|
||||||
|
echo "gstack setup failed: Playwright Chromium could not be launched via Node.js" >&2
|
||||||
|
echo " This is a known issue with Bun on Windows (oven-sh/bun#4253)." >&2
|
||||||
|
echo " Ensure Node.js is installed and 'node -e \"require('playwright')\"' works." >&2
|
||||||
|
else
|
||||||
|
echo "gstack setup failed: Playwright Chromium could not be launched" >&2
|
||||||
|
fi
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,173 @@
|
||||||
|
/**
|
||||||
|
* Gemini CLI E2E tests — verify skills work when invoked by Gemini CLI.
|
||||||
|
*
|
||||||
|
* Spawns `gemini -p` with stream-json output in the repo root (where
|
||||||
|
* .agents/skills/ already exists), parses JSONL events, and validates
|
||||||
|
* structured results. Follows the same pattern as codex-e2e.test.ts.
|
||||||
|
*
|
||||||
|
* Prerequisites:
|
||||||
|
* - `gemini` binary installed (npm install -g @google/gemini-cli)
|
||||||
|
* - Gemini authenticated via ~/.gemini/ config or GEMINI_API_KEY env var
|
||||||
|
* - EVALS=1 env var set (same gate as Claude E2E tests)
|
||||||
|
*
|
||||||
|
* Skips gracefully when prerequisites are not met.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, test, expect, afterAll } from 'bun:test';
|
||||||
|
import { runGeminiSkill } from './helpers/gemini-session-runner';
|
||||||
|
import type { GeminiResult } from './helpers/gemini-session-runner';
|
||||||
|
import { EvalCollector } from './helpers/eval-store';
|
||||||
|
import { selectTests, detectBaseBranch, getChangedFiles, GLOBAL_TOUCHFILES } from './helpers/touchfiles';
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
const ROOT = path.resolve(import.meta.dir, '..');
|
||||||
|
|
||||||
|
// --- Prerequisites check ---
|
||||||
|
|
||||||
|
const GEMINI_AVAILABLE = (() => {
|
||||||
|
try {
|
||||||
|
const result = Bun.spawnSync(['which', 'gemini']);
|
||||||
|
return result.exitCode === 0;
|
||||||
|
} catch { return false; }
|
||||||
|
})();
|
||||||
|
|
||||||
|
const evalsEnabled = !!process.env.EVALS;
|
||||||
|
|
||||||
|
// Skip all tests if gemini is not available or EVALS is not set.
|
||||||
|
const SKIP = !GEMINI_AVAILABLE || !evalsEnabled;
|
||||||
|
|
||||||
|
const describeGemini = SKIP ? describe.skip : describe;
|
||||||
|
|
||||||
|
// Log why we're skipping (helpful for debugging CI)
|
||||||
|
if (!evalsEnabled) {
|
||||||
|
// Silent — same as Claude E2E tests, EVALS=1 required
|
||||||
|
} else if (!GEMINI_AVAILABLE) {
|
||||||
|
process.stderr.write('\nGemini E2E: SKIPPED — gemini binary not found (install: npm i -g @google/gemini-cli)\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Diff-based test selection ---
|
||||||
|
|
||||||
|
// Gemini E2E touchfiles — keyed by test name, same pattern as Codex E2E
|
||||||
|
const GEMINI_E2E_TOUCHFILES: Record<string, string[]> = {
|
||||||
|
'gemini-discover-skill': ['.agents/skills/**', 'test/helpers/gemini-session-runner.ts'],
|
||||||
|
'gemini-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'test/helpers/gemini-session-runner.ts'],
|
||||||
|
};
|
||||||
|
|
||||||
|
let selectedTests: string[] | null = null; // null = run all
|
||||||
|
|
||||||
|
if (evalsEnabled && !process.env.EVALS_ALL) {
|
||||||
|
const baseBranch = process.env.EVALS_BASE
|
||||||
|
|| detectBaseBranch(ROOT)
|
||||||
|
|| 'main';
|
||||||
|
const changedFiles = getChangedFiles(baseBranch, ROOT);
|
||||||
|
|
||||||
|
if (changedFiles.length > 0) {
|
||||||
|
const selection = selectTests(changedFiles, GEMINI_E2E_TOUCHFILES, GLOBAL_TOUCHFILES);
|
||||||
|
selectedTests = selection.selected;
|
||||||
|
process.stderr.write(`\nGemini E2E selection (${selection.reason}): ${selection.selected.length}/${Object.keys(GEMINI_E2E_TOUCHFILES).length} tests\n`);
|
||||||
|
if (selection.skipped.length > 0) {
|
||||||
|
process.stderr.write(` Skipped: ${selection.skipped.join(', ')}\n`);
|
||||||
|
}
|
||||||
|
process.stderr.write('\n');
|
||||||
|
}
|
||||||
|
// If changedFiles is empty (e.g., on main branch), selectedTests stays null -> run all
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Skip an individual test if not selected by diff-based selection. */
|
||||||
|
function testIfSelected(testName: string, fn: () => Promise<void>, timeout: number) {
|
||||||
|
const shouldRun = selectedTests === null || selectedTests.includes(testName);
|
||||||
|
(shouldRun ? test : test.skip)(testName, fn, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Eval result collector ---
|
||||||
|
|
||||||
|
const evalCollector = evalsEnabled && !SKIP ? new EvalCollector('e2e-gemini') : null;
|
||||||
|
|
||||||
|
/** DRY helper to record a Gemini E2E test result into the eval collector. */
|
||||||
|
function recordGeminiE2E(name: string, result: GeminiResult, passed: boolean) {
|
||||||
|
evalCollector?.addTest({
|
||||||
|
name,
|
||||||
|
suite: 'gemini-e2e',
|
||||||
|
tier: 'e2e',
|
||||||
|
passed,
|
||||||
|
duration_ms: result.durationMs,
|
||||||
|
cost_usd: 0, // Gemini doesn't report cost in USD; tokens are tracked
|
||||||
|
output: result.output?.slice(0, 2000),
|
||||||
|
turns_used: result.toolCalls.length, // approximate: tool calls as turns
|
||||||
|
exit_reason: result.exitCode === 0 ? 'success' : `exit_code_${result.exitCode}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Print cost summary after a Gemini E2E test. */
|
||||||
|
function logGeminiCost(label: string, result: GeminiResult) {
|
||||||
|
const durationSec = Math.round(result.durationMs / 1000);
|
||||||
|
console.log(`${label}: ${result.tokens} tokens, ${result.toolCalls.length} tool calls, ${durationSec}s`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finalize eval results on exit
|
||||||
|
afterAll(async () => {
|
||||||
|
if (evalCollector) {
|
||||||
|
await evalCollector.finalize();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// --- Tests ---
|
||||||
|
|
||||||
|
describeGemini('Gemini E2E', () => {
|
||||||
|
|
||||||
|
testIfSelected('gemini-discover-skill', async () => {
|
||||||
|
// Run Gemini in the repo root where .agents/skills/ exists
|
||||||
|
const result = await runGeminiSkill({
|
||||||
|
prompt: 'List any skills or instructions you have available. Just list the names.',
|
||||||
|
timeoutMs: 60_000,
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
|
||||||
|
logGeminiCost('gemini-discover-skill', result);
|
||||||
|
|
||||||
|
// Gemini should have produced some output
|
||||||
|
const passed = result.exitCode === 0 && result.output.length > 0;
|
||||||
|
recordGeminiE2E('gemini-discover-skill', result, passed);
|
||||||
|
|
||||||
|
expect(result.exitCode).toBe(0);
|
||||||
|
expect(result.output.length).toBeGreaterThan(0);
|
||||||
|
// The output should reference skills in some form
|
||||||
|
const outputLower = result.output.toLowerCase();
|
||||||
|
expect(
|
||||||
|
outputLower.includes('review') || outputLower.includes('gstack') || outputLower.includes('skill'),
|
||||||
|
).toBe(true);
|
||||||
|
}, 120_000);
|
||||||
|
|
||||||
|
testIfSelected('gemini-review-findings', async () => {
|
||||||
|
// Run gstack-review skill via Gemini on this repo
|
||||||
|
const result = await runGeminiSkill({
|
||||||
|
prompt: 'Run the gstack-review skill on this repository. Review the current branch diff and report your findings.',
|
||||||
|
timeoutMs: 540_000,
|
||||||
|
cwd: ROOT,
|
||||||
|
});
|
||||||
|
|
||||||
|
logGeminiCost('gemini-review-findings', result);
|
||||||
|
|
||||||
|
// Should produce structured review-like output
|
||||||
|
const output = result.output;
|
||||||
|
const passed = result.exitCode === 0 && output.length > 50;
|
||||||
|
recordGeminiE2E('gemini-review-findings', result, passed);
|
||||||
|
|
||||||
|
expect(result.exitCode).toBe(0);
|
||||||
|
expect(output.length).toBeGreaterThan(50);
|
||||||
|
|
||||||
|
// Review output should contain some review-like content
|
||||||
|
const outputLower = output.toLowerCase();
|
||||||
|
const hasReviewContent =
|
||||||
|
outputLower.includes('finding') ||
|
||||||
|
outputLower.includes('issue') ||
|
||||||
|
outputLower.includes('review') ||
|
||||||
|
outputLower.includes('change') ||
|
||||||
|
outputLower.includes('diff') ||
|
||||||
|
outputLower.includes('clean') ||
|
||||||
|
outputLower.includes('no issues') ||
|
||||||
|
outputLower.includes('p1') ||
|
||||||
|
outputLower.includes('p2');
|
||||||
|
expect(hasReviewContent).toBe(true);
|
||||||
|
}, 600_000);
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
import { describe, test, expect } from 'bun:test';
|
||||||
|
import { parseGeminiJSONL } from './gemini-session-runner';
|
||||||
|
|
||||||
|
// Fixture: actual Gemini CLI stream-json output with tool use
|
||||||
|
const FIXTURE_LINES = [
|
||||||
|
'{"type":"init","timestamp":"2026-03-20T15:14:46.455Z","session_id":"test-session-123","model":"auto-gemini-3"}',
|
||||||
|
'{"type":"message","timestamp":"2026-03-20T15:14:46.456Z","role":"user","content":"list the files"}',
|
||||||
|
'{"type":"message","timestamp":"2026-03-20T15:14:49.650Z","role":"assistant","content":"I will list the files.","delta":true}',
|
||||||
|
'{"type":"tool_use","timestamp":"2026-03-20T15:14:49.690Z","tool_name":"run_shell_command","tool_id":"cmd_1","parameters":{"command":"ls"}}',
|
||||||
|
'{"type":"tool_result","timestamp":"2026-03-20T15:14:49.931Z","tool_id":"cmd_1","status":"success","output":"file1.ts\\nfile2.ts"}',
|
||||||
|
'{"type":"message","timestamp":"2026-03-20T15:14:51.945Z","role":"assistant","content":"Here are the files.","delta":true}',
|
||||||
|
'{"type":"result","timestamp":"2026-03-20T15:14:52.030Z","status":"success","stats":{"total_tokens":27147,"input_tokens":26928,"output_tokens":87,"cached":0,"duration_ms":5575,"tool_calls":1}}',
|
||||||
|
];
|
||||||
|
|
||||||
|
describe('parseGeminiJSONL', () => {
|
||||||
|
test('extracts session ID from init event', () => {
|
||||||
|
const parsed = parseGeminiJSONL(FIXTURE_LINES);
|
||||||
|
expect(parsed.sessionId).toBe('test-session-123');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('concatenates assistant message deltas into output', () => {
|
||||||
|
const parsed = parseGeminiJSONL(FIXTURE_LINES);
|
||||||
|
expect(parsed.output).toBe('I will list the files.Here are the files.');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('ignores user messages', () => {
|
||||||
|
const lines = [
|
||||||
|
'{"type":"message","role":"user","content":"this should be ignored"}',
|
||||||
|
'{"type":"message","role":"assistant","content":"this should be kept","delta":true}',
|
||||||
|
];
|
||||||
|
const parsed = parseGeminiJSONL(lines);
|
||||||
|
expect(parsed.output).toBe('this should be kept');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('extracts tool names from tool_use events', () => {
|
||||||
|
const parsed = parseGeminiJSONL(FIXTURE_LINES);
|
||||||
|
expect(parsed.toolCalls).toHaveLength(1);
|
||||||
|
expect(parsed.toolCalls[0]).toBe('run_shell_command');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('extracts total tokens from result stats', () => {
|
||||||
|
const parsed = parseGeminiJSONL(FIXTURE_LINES);
|
||||||
|
expect(parsed.tokens).toBe(27147);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('skips malformed lines without throwing', () => {
|
||||||
|
const lines = [
|
||||||
|
'{"type":"init","session_id":"ok"}',
|
||||||
|
'this is not json',
|
||||||
|
'{"type":"message","role":"assistant","content":"hello","delta":true}',
|
||||||
|
'{incomplete json',
|
||||||
|
'{"type":"result","status":"success","stats":{"total_tokens":100}}',
|
||||||
|
];
|
||||||
|
const parsed = parseGeminiJSONL(lines);
|
||||||
|
expect(parsed.sessionId).toBe('ok');
|
||||||
|
expect(parsed.output).toBe('hello');
|
||||||
|
expect(parsed.tokens).toBe(100);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('skips empty and whitespace-only lines', () => {
|
||||||
|
const lines = [
|
||||||
|
'',
|
||||||
|
' ',
|
||||||
|
'{"type":"init","session_id":"s1"}',
|
||||||
|
'\t',
|
||||||
|
'{"type":"result","status":"success","stats":{"total_tokens":50}}',
|
||||||
|
];
|
||||||
|
const parsed = parseGeminiJSONL(lines);
|
||||||
|
expect(parsed.sessionId).toBe('s1');
|
||||||
|
expect(parsed.tokens).toBe(50);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles empty input', () => {
|
||||||
|
const parsed = parseGeminiJSONL([]);
|
||||||
|
expect(parsed.output).toBe('');
|
||||||
|
expect(parsed.toolCalls).toHaveLength(0);
|
||||||
|
expect(parsed.tokens).toBe(0);
|
||||||
|
expect(parsed.sessionId).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles missing fields gracefully', () => {
|
||||||
|
const lines = [
|
||||||
|
'{"type":"init"}', // no session_id
|
||||||
|
'{"type":"message","role":"assistant"}', // no content
|
||||||
|
'{"type":"tool_use"}', // no tool_name
|
||||||
|
'{"type":"result","status":"success"}', // no stats
|
||||||
|
];
|
||||||
|
const parsed = parseGeminiJSONL(lines);
|
||||||
|
expect(parsed.sessionId).toBeNull();
|
||||||
|
expect(parsed.output).toBe('');
|
||||||
|
expect(parsed.toolCalls).toHaveLength(0);
|
||||||
|
expect(parsed.tokens).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles multiple tool_use events', () => {
|
||||||
|
const lines = [
|
||||||
|
'{"type":"tool_use","tool_name":"run_shell_command","tool_id":"cmd_1","parameters":{"command":"ls"}}',
|
||||||
|
'{"type":"tool_use","tool_name":"read_file","tool_id":"cmd_2","parameters":{"path":"foo.ts"}}',
|
||||||
|
'{"type":"tool_use","tool_name":"run_shell_command","tool_id":"cmd_3","parameters":{"command":"cat bar.ts"}}',
|
||||||
|
];
|
||||||
|
const parsed = parseGeminiJSONL(lines);
|
||||||
|
expect(parsed.toolCalls).toEqual(['run_shell_command', 'read_file', 'run_shell_command']);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
@ -0,0 +1,201 @@
|
||||||
|
/**
|
||||||
|
* Gemini CLI subprocess runner for skill E2E testing.
|
||||||
|
*
|
||||||
|
* Spawns `gemini -p` as an independent process, parses its stream-json
|
||||||
|
* output, and returns structured results. Follows the same pattern as
|
||||||
|
* codex-session-runner.ts but adapted for the Gemini CLI.
|
||||||
|
*
|
||||||
|
* Key differences from Codex session-runner:
|
||||||
|
* - Uses `gemini -p` instead of `codex exec`
|
||||||
|
* - Output is NDJSON with event types: init, message, tool_use, tool_result, result
|
||||||
|
* - Uses `--output-format stream-json --yolo` instead of `--json -s read-only`
|
||||||
|
* - No temp HOME needed — Gemini discovers skills from `.agents/skills/` in cwd
|
||||||
|
* - Message events are streamed with `delta: true` — must concatenate
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as path from 'path';
|
||||||
|
|
||||||
|
// --- Interfaces ---
|
||||||
|
|
||||||
|
export interface GeminiResult {
|
||||||
|
output: string; // Full assistant message text (concatenated deltas)
|
||||||
|
toolCalls: string[]; // Tool names from tool_use events
|
||||||
|
tokens: number; // Total tokens used
|
||||||
|
exitCode: number; // Process exit code
|
||||||
|
durationMs: number; // Wall clock time
|
||||||
|
sessionId: string | null; // Session ID from init event
|
||||||
|
rawLines: string[]; // Raw JSONL lines for debugging
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- JSONL parser ---
|
||||||
|
|
||||||
|
export interface ParsedGeminiJSONL {
|
||||||
|
output: string;
|
||||||
|
toolCalls: string[];
|
||||||
|
tokens: number;
|
||||||
|
sessionId: string | null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse an array of JSONL lines from `gemini -p --output-format stream-json`.
|
||||||
|
* Pure function — no I/O, no side effects.
|
||||||
|
*
|
||||||
|
* Handles these Gemini event types:
|
||||||
|
* - init → extract session_id
|
||||||
|
* - message (role=assistant, delta=true) → concatenate content into output
|
||||||
|
* - tool_use → extract tool_name
|
||||||
|
* - tool_result → logged but not extracted
|
||||||
|
* - result → extract token usage from stats
|
||||||
|
*/
|
||||||
|
export function parseGeminiJSONL(lines: string[]): ParsedGeminiJSONL {
|
||||||
|
const outputParts: string[] = [];
|
||||||
|
const toolCalls: string[] = [];
|
||||||
|
let tokens = 0;
|
||||||
|
let sessionId: string | null = null;
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) continue;
|
||||||
|
try {
|
||||||
|
const obj = JSON.parse(line);
|
||||||
|
const t = obj.type || '';
|
||||||
|
|
||||||
|
if (t === 'init') {
|
||||||
|
const sid = obj.session_id || '';
|
||||||
|
if (sid) sessionId = sid;
|
||||||
|
} else if (t === 'message') {
|
||||||
|
if (obj.role === 'assistant' && obj.content) {
|
||||||
|
outputParts.push(obj.content);
|
||||||
|
}
|
||||||
|
} else if (t === 'tool_use') {
|
||||||
|
const name = obj.tool_name || '';
|
||||||
|
if (name) toolCalls.push(name);
|
||||||
|
} else if (t === 'result') {
|
||||||
|
const stats = obj.stats || {};
|
||||||
|
tokens = (stats.total_tokens || 0);
|
||||||
|
}
|
||||||
|
} catch { /* skip malformed lines */ }
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
output: outputParts.join(''),
|
||||||
|
toolCalls,
|
||||||
|
tokens,
|
||||||
|
sessionId,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Main runner ---
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a prompt via `gemini -p` and return structured results.
|
||||||
|
*
|
||||||
|
* Spawns gemini with stream-json output, parses JSONL events,
|
||||||
|
* and returns a GeminiResult. Skips gracefully if gemini binary is not found.
|
||||||
|
*/
|
||||||
|
export async function runGeminiSkill(opts: {
|
||||||
|
prompt: string; // What to ask Gemini
|
||||||
|
timeoutMs?: number; // Default 300000 (5 min)
|
||||||
|
cwd?: string; // Working directory (where .agents/skills/ lives)
|
||||||
|
}): Promise<GeminiResult> {
|
||||||
|
const {
|
||||||
|
prompt,
|
||||||
|
timeoutMs = 300_000,
|
||||||
|
cwd,
|
||||||
|
} = opts;
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// Check if gemini binary exists
|
||||||
|
const whichResult = Bun.spawnSync(['which', 'gemini']);
|
||||||
|
if (whichResult.exitCode !== 0) {
|
||||||
|
return {
|
||||||
|
output: 'SKIP: gemini binary not found',
|
||||||
|
toolCalls: [],
|
||||||
|
tokens: 0,
|
||||||
|
exitCode: -1,
|
||||||
|
durationMs: Date.now() - startTime,
|
||||||
|
sessionId: null,
|
||||||
|
rawLines: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build gemini command
|
||||||
|
const args = ['-p', prompt, '--output-format', 'stream-json', '--yolo'];
|
||||||
|
|
||||||
|
// Spawn gemini — uses real HOME for auth, cwd for skill discovery
|
||||||
|
const proc = Bun.spawn(['gemini', ...args], {
|
||||||
|
cwd: cwd || process.cwd(),
|
||||||
|
stdout: 'pipe',
|
||||||
|
stderr: 'pipe',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Race against timeout
|
||||||
|
let timedOut = false;
|
||||||
|
const timeoutId = setTimeout(() => {
|
||||||
|
timedOut = true;
|
||||||
|
proc.kill();
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
// Stream and collect JSONL from stdout
|
||||||
|
const collectedLines: string[] = [];
|
||||||
|
const stderrPromise = new Response(proc.stderr).text();
|
||||||
|
|
||||||
|
const reader = proc.stdout.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buf = '';
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
const { done, value } = await reader.read();
|
||||||
|
if (done) break;
|
||||||
|
buf += decoder.decode(value, { stream: true });
|
||||||
|
const lines = buf.split('\n');
|
||||||
|
buf = lines.pop() || '';
|
||||||
|
for (const line of lines) {
|
||||||
|
if (!line.trim()) continue;
|
||||||
|
collectedLines.push(line);
|
||||||
|
|
||||||
|
// Real-time progress to stderr
|
||||||
|
try {
|
||||||
|
const event = JSON.parse(line);
|
||||||
|
if (event.type === 'tool_use' && event.tool_name) {
|
||||||
|
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
||||||
|
process.stderr.write(` [gemini ${elapsed}s] tool: ${event.tool_name}\n`);
|
||||||
|
} else if (event.type === 'message' && event.role === 'assistant' && event.content) {
|
||||||
|
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
||||||
|
process.stderr.write(` [gemini ${elapsed}s] message: ${event.content.slice(0, 100)}\n`);
|
||||||
|
}
|
||||||
|
} catch { /* skip — parseGeminiJSONL will handle it later */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch { /* stream read error — fall through to exit code handling */ }
|
||||||
|
|
||||||
|
// Flush remaining buffer
|
||||||
|
if (buf.trim()) {
|
||||||
|
collectedLines.push(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
const stderr = await stderrPromise;
|
||||||
|
const exitCode = await proc.exited;
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
|
||||||
|
const durationMs = Date.now() - startTime;
|
||||||
|
|
||||||
|
// Parse all collected JSONL lines
|
||||||
|
const parsed = parseGeminiJSONL(collectedLines);
|
||||||
|
|
||||||
|
// Log stderr if non-empty (may contain auth errors, etc.)
|
||||||
|
if (stderr.trim()) {
|
||||||
|
process.stderr.write(` [gemini stderr] ${stderr.trim().slice(0, 200)}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
output: parsed.output,
|
||||||
|
toolCalls: parsed.toolCalls,
|
||||||
|
tokens: parsed.tokens,
|
||||||
|
exitCode: timedOut ? 124 : exitCode,
|
||||||
|
durationMs,
|
||||||
|
sessionId: parsed.sessionId,
|
||||||
|
rawLines: collectedLines,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
@ -84,6 +84,10 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||||
'codex-discover-skill': ['codex/**', '.agents/skills/**', 'test/helpers/codex-session-runner.ts'],
|
'codex-discover-skill': ['codex/**', '.agents/skills/**', 'test/helpers/codex-session-runner.ts'],
|
||||||
'codex-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'codex/**', 'test/helpers/codex-session-runner.ts'],
|
'codex-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'codex/**', 'test/helpers/codex-session-runner.ts'],
|
||||||
|
|
||||||
|
// Gemini E2E (tests skills via Gemini CLI)
|
||||||
|
'gemini-discover-skill': ['.agents/skills/**', 'test/helpers/gemini-session-runner.ts'],
|
||||||
|
'gemini-review-findings': ['review/**', '.agents/skills/gstack-review/**', 'test/helpers/gemini-session-runner.ts'],
|
||||||
|
|
||||||
// QA bootstrap
|
// QA bootstrap
|
||||||
'qa-bootstrap': ['qa/**', 'browse/src/**', 'ship/**'],
|
'qa-bootstrap': ['qa/**', 'browse/src/**', 'ship/**'],
|
||||||
|
|
||||||
|
|
@ -160,6 +164,7 @@ export const LLM_JUDGE_TOUCHFILES: Record<string, string[]> = {
|
||||||
export const GLOBAL_TOUCHFILES = [
|
export const GLOBAL_TOUCHFILES = [
|
||||||
'test/helpers/session-runner.ts',
|
'test/helpers/session-runner.ts',
|
||||||
'test/helpers/codex-session-runner.ts',
|
'test/helpers/codex-session-runner.ts',
|
||||||
|
'test/helpers/gemini-session-runner.ts',
|
||||||
'test/helpers/eval-store.ts',
|
'test/helpers/eval-store.ts',
|
||||||
'test/helpers/llm-judge.ts',
|
'test/helpers/llm-judge.ts',
|
||||||
'scripts/gen-skill-docs.ts',
|
'scripts/gen-skill-docs.ts',
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue