mirror of https://github.com/garrytan/gstack.git
test: sidebar CSS interaction E2E — HN comment highlight round-trip
New E2E test (periodic tier, ~$2/run) that exercises the full sidebar agent pipeline with CSS interaction: 1. Agent navigates to Hacker News 2. Clicks into the top story's comments 3. Reads comments and identifies the most insightful one 4. Highlights it with a 4px solid orange outline via style injection Tests: navigation, snapshot, text reading, LLM judgment, CSS modification. Requires real browser + real Claude (ANTHROPIC_API_KEY). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c77f064122
commit
0ccb9c1f01
|
|
@ -152,6 +152,7 @@ export const E2E_TOUCHFILES: Record<string, string[]> = {
|
||||||
// Sidebar agent
|
// Sidebar agent
|
||||||
'sidebar-navigate': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/**'],
|
'sidebar-navigate': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/**'],
|
||||||
'sidebar-url-accuracy': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/background.js'],
|
'sidebar-url-accuracy': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/sidebar-utils.ts', 'extension/background.js'],
|
||||||
|
'sidebar-css-interaction': ['browse/src/server.ts', 'browse/src/sidebar-agent.ts', 'browse/src/write-commands.ts', 'browse/src/read-commands.ts', 'browse/src/cdp-inspector.ts', 'extension/**'],
|
||||||
|
|
||||||
// Autoplan
|
// Autoplan
|
||||||
'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'],
|
'autoplan-core': ['autoplan/**', 'plan-ceo-review/**', 'plan-eng-review/**', 'plan-design-review/**'],
|
||||||
|
|
@ -282,6 +283,7 @@ export const E2E_TIERS: Record<string, 'gate' | 'periodic'> = {
|
||||||
// Sidebar agent
|
// Sidebar agent
|
||||||
'sidebar-navigate': 'periodic',
|
'sidebar-navigate': 'periodic',
|
||||||
'sidebar-url-accuracy': 'periodic',
|
'sidebar-url-accuracy': 'periodic',
|
||||||
|
'sidebar-css-interaction': 'periodic',
|
||||||
|
|
||||||
// Autoplan — periodic (not yet implemented)
|
// Autoplan — periodic (not yet implemented)
|
||||||
'autoplan-core': 'periodic',
|
'autoplan-core': 'periodic',
|
||||||
|
|
|
||||||
|
|
@ -149,6 +149,155 @@ describeIfSelected('Sidebar URL accuracy E2E', ['sidebar-url-accuracy'], () => {
|
||||||
}, 30_000);
|
}, 30_000);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// --- Sidebar CSS Interaction E2E (real Claude + real browser) ---
|
||||||
|
// Goes to HN, reads comments, identifies the most insightful one, highlights it.
|
||||||
|
// Exercises: navigation, snapshot, text reading, LLM judgment, CSS style injection.
|
||||||
|
|
||||||
|
describeIfSelected('Sidebar CSS interaction E2E', ['sidebar-css-interaction'], () => {
|
||||||
|
let serverProc: Subprocess | null = null;
|
||||||
|
let agentProc: Subprocess | null = null;
|
||||||
|
let serverPort: number = 0;
|
||||||
|
let authToken: string = '';
|
||||||
|
let tmpDir: string = '';
|
||||||
|
let stateFile: string = '';
|
||||||
|
let queueFile: string = '';
|
||||||
|
|
||||||
|
async function api(pathname: string, opts: RequestInit = {}): Promise<Response> {
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
...(opts.headers as Record<string, string> || {}),
|
||||||
|
};
|
||||||
|
if (!headers['Authorization'] && authToken) {
|
||||||
|
headers['Authorization'] = `Bearer ${authToken}`;
|
||||||
|
}
|
||||||
|
return fetch(`http://127.0.0.1:${serverPort}${pathname}`, { ...opts, headers });
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'sidebar-e2e-css-'));
|
||||||
|
stateFile = path.join(tmpDir, 'browse.json');
|
||||||
|
queueFile = path.join(tmpDir, 'sidebar-queue.jsonl');
|
||||||
|
fs.mkdirSync(path.dirname(queueFile), { recursive: true });
|
||||||
|
|
||||||
|
// Start server WITH a real browser (no HEADLESS_SKIP) for CSS interaction
|
||||||
|
const serverScript = path.resolve(ROOT, 'browse', 'src', 'server.ts');
|
||||||
|
serverProc = spawn(['bun', 'run', serverScript], {
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
BROWSE_STATE_FILE: stateFile,
|
||||||
|
BROWSE_PORT: '0',
|
||||||
|
SIDEBAR_QUEUE_PATH: queueFile,
|
||||||
|
BROWSE_IDLE_TIMEOUT: '300',
|
||||||
|
},
|
||||||
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
|
});
|
||||||
|
|
||||||
|
const deadline = Date.now() + 30000;
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
if (fs.existsSync(stateFile)) {
|
||||||
|
try {
|
||||||
|
const state = JSON.parse(fs.readFileSync(stateFile, 'utf-8'));
|
||||||
|
if (state.port && state.token) {
|
||||||
|
serverPort = state.port;
|
||||||
|
authToken = state.token;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
}
|
||||||
|
await new Promise(r => setTimeout(r, 200));
|
||||||
|
}
|
||||||
|
if (!serverPort) throw new Error('Server did not start in time');
|
||||||
|
|
||||||
|
// Start sidebar-agent with the real browse binary
|
||||||
|
const agentScript = path.resolve(ROOT, 'browse', 'src', 'sidebar-agent.ts');
|
||||||
|
const browseBin = path.resolve(ROOT, 'browse', 'dist', 'browse');
|
||||||
|
agentProc = spawn(['bun', 'run', agentScript], {
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
BROWSE_SERVER_PORT: String(serverPort),
|
||||||
|
BROWSE_STATE_FILE: stateFile,
|
||||||
|
SIDEBAR_QUEUE_PATH: queueFile,
|
||||||
|
SIDEBAR_AGENT_TIMEOUT: '120000',
|
||||||
|
BROWSE_BIN: fs.existsSync(browseBin) ? browseBin : 'echo',
|
||||||
|
},
|
||||||
|
stdio: ['ignore', 'pipe', 'pipe'],
|
||||||
|
});
|
||||||
|
|
||||||
|
await new Promise(r => setTimeout(r, 2000));
|
||||||
|
}, 35000);
|
||||||
|
|
||||||
|
afterAll(() => {
|
||||||
|
if (agentProc) { try { agentProc.kill(); } catch {} }
|
||||||
|
if (serverProc) { try { serverProc.kill(); } catch {} }
|
||||||
|
finalizeEvalCollector(evalCollector);
|
||||||
|
try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||||
|
});
|
||||||
|
|
||||||
|
testIfSelected('sidebar-css-interaction', async () => {
|
||||||
|
await api('/sidebar-session/new', { method: 'POST' });
|
||||||
|
fs.writeFileSync(queueFile, '');
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// Ask the agent to go to HN, find the most insightful comment, and highlight it
|
||||||
|
const resp = await api('/sidebar-command', {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify({
|
||||||
|
message: 'Go to https://news.ycombinator.com. Find the top story. Click into its comments. Read the comments and find the most insightful one. Highlight that comment with a 4px solid orange outline.',
|
||||||
|
activeTabUrl: 'about:blank',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
expect(resp.status).toBe(200);
|
||||||
|
|
||||||
|
// Poll for agent_done (2 min timeout — this is a multi-step task)
|
||||||
|
const deadline = Date.now() + 120000;
|
||||||
|
let entries: any[] = [];
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
const chatResp = await api('/sidebar-chat?after=0');
|
||||||
|
const data = await chatResp.json();
|
||||||
|
entries = data.entries;
|
||||||
|
if (entries.some((e: any) => e.type === 'agent_done')) break;
|
||||||
|
await new Promise(r => setTimeout(r, 3000));
|
||||||
|
}
|
||||||
|
|
||||||
|
const duration = Date.now() - startTime;
|
||||||
|
const doneEntry = entries.find((e: any) => e.type === 'agent_done');
|
||||||
|
|
||||||
|
// Agent should have completed
|
||||||
|
expect(doneEntry).toBeDefined();
|
||||||
|
|
||||||
|
// Agent should have run browse commands (look for tool_use entries)
|
||||||
|
const toolUses = entries.filter((e: any) => e.type === 'tool_use');
|
||||||
|
expect(toolUses.length).toBeGreaterThanOrEqual(2); // At minimum: goto + one more
|
||||||
|
|
||||||
|
// Agent text should mention something about the comment it found
|
||||||
|
const agentText = entries
|
||||||
|
.filter((e: any) => e.role === 'agent' && (e.type === 'text' || e.type === 'result'))
|
||||||
|
.map((e: any) => e.text || '')
|
||||||
|
.join(' ')
|
||||||
|
.toLowerCase();
|
||||||
|
|
||||||
|
// Should have navigated to HN (look for tool output mentioning ycombinator)
|
||||||
|
const toolOutputs = entries
|
||||||
|
.filter((e: any) => e.type === 'tool_result')
|
||||||
|
.map((e: any) => e.text || '')
|
||||||
|
.join(' ');
|
||||||
|
const navigatedToHN = toolOutputs.includes('ycombinator') || toolOutputs.includes('Hacker News');
|
||||||
|
expect(navigatedToHN).toBe(true);
|
||||||
|
|
||||||
|
// Should have applied a style (look for orange/outline in tool commands)
|
||||||
|
const allText = entries.map((e: any) => e.text || '').join(' ');
|
||||||
|
const appliedStyle = allText.includes('outline') || allText.includes('orange') || allText.includes('style');
|
||||||
|
|
||||||
|
evalCollector?.addTest({
|
||||||
|
name: 'sidebar-css-interaction', suite: 'Sidebar CSS interaction E2E', tier: 'e2e',
|
||||||
|
passed: !!doneEntry && navigatedToHN && appliedStyle,
|
||||||
|
duration_ms: duration,
|
||||||
|
cost_usd: 0,
|
||||||
|
exit_reason: doneEntry ? 'success' : 'timeout',
|
||||||
|
});
|
||||||
|
}, 150_000);
|
||||||
|
});
|
||||||
|
|
||||||
// --- Sidebar Navigate (real Claude, requires ANTHROPIC_API_KEY) ---
|
// --- Sidebar Navigate (real Claude, requires ANTHROPIC_API_KEY) ---
|
||||||
|
|
||||||
describeIfSelected('Sidebar navigate E2E', ['sidebar-navigate'], () => {
|
describeIfSelected('Sidebar navigate E2E', ['sidebar-navigate'], () => {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue