mirror of https://github.com/garrytan/gstack.git
fix(browse): guard full-page screenshots against Anthropic vision API >2000px brick (#1214)
Full-page screenshots of tall pages routinely exceeded 2000px on the longest dimension, silently bricking the agent's session: the resulting base64 reached the Anthropic vision API which rejected the oversized image, leaving the agent burning turns on a useless blob with no stderr trace from the browse side. Adds browse/src/screenshot-size-guard.ts as a shared helper: - guardScreenshotBuffer(buf) → downscales in-memory if max(w,h) > 2000 - guardScreenshotPath(path) → file-mode variant that rewrites in place - Aspect ratio preserved via sharp's resize fit:inside - Stderr diagnostic on any downscale so callers can see when it fired - Lazy sharp import so non-screenshot paths pay no startup cost Wires the guard into all three full-page callsites codex review flagged: - browse/src/snapshot.ts: annotated + heatmap fullPage captures - browse/src/meta-commands.ts: screenshot command (path + base64 fullPage modes) plus the responsive 3-viewport sweep - browse/src/write-commands.ts: prettyscreenshot fullPage path Covers seven unit cases (pass-through, downscale, aspect ratio, exactly-2000px edge, file-mode rewrite) plus a static invariant test that fails the build if any of the three callsites stops importing the guard. Closes #1214. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
16fca84d04
commit
dd84bdb7d9
|
|
@ -11,6 +11,7 @@ import { handleSkillCommand } from './browser-skill-commands';
|
|||
import { validateNavigationUrl } from './url-validation';
|
||||
import { checkScope, type TokenInfo } from './token-registry';
|
||||
import { validateOutputPath, validateReadPath, SAFE_DIRECTORIES, escapeRegExp } from './path-security';
|
||||
import { guardScreenshotBuffer, guardScreenshotPath } from './screenshot-size-guard';
|
||||
// Re-export for backward compatibility (tests import from meta-commands)
|
||||
export { validateOutputPath, escapeRegExp } from './path-security';
|
||||
import * as Diff from 'diff';
|
||||
|
|
@ -497,6 +498,10 @@ export async function handleMetaCommand(
|
|||
buffer = await page.screenshot({ clip: clipRect });
|
||||
} else {
|
||||
buffer = await page.screenshot({ fullPage: !viewportOnly });
|
||||
// Guard the most common API-bricking case (fullPage). Element /
|
||||
// clip captures usually stay within the cap; we still guard the
|
||||
// path-mode below for fullPage writes.
|
||||
({ buffer } = await guardScreenshotBuffer(buffer));
|
||||
}
|
||||
if (buffer.length > 10 * 1024 * 1024) {
|
||||
throw new Error('Screenshot too large for --base64 (>10MB). Use disk path instead.');
|
||||
|
|
@ -517,6 +522,7 @@ export async function handleMetaCommand(
|
|||
}
|
||||
|
||||
await page.screenshot({ path: outputPath, fullPage: !viewportOnly });
|
||||
if (!viewportOnly) await guardScreenshotPath(outputPath);
|
||||
return `Screenshot saved${viewportOnly ? ' (viewport)' : ''}: ${outputPath}`;
|
||||
}
|
||||
|
||||
|
|
@ -567,6 +573,7 @@ export async function handleMetaCommand(
|
|||
const screenshotPath = `${prefix}-${vp.name}.png`;
|
||||
validateOutputPath(screenshotPath);
|
||||
await page.screenshot({ path: screenshotPath, fullPage: true });
|
||||
await guardScreenshotPath(screenshotPath);
|
||||
results.push(`${vp.name} (${vp.width}x${vp.height}): ${screenshotPath}`);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,106 @@
|
|||
/**
|
||||
* Screenshot size guard — keep full-page screenshots ≤ 2000px max-dim.
|
||||
*
|
||||
* The Anthropic vision API rejects images whose longest dimension exceeds
|
||||
* 2000 image-pixels (post deviceScaleFactor). Full-page screenshots of long
|
||||
* pages routinely exceed that, silently bricking the session: the agent
|
||||
* burns turns on a base64 blob that errors model-side with no useful
|
||||
* stderr surfacing on the browse side.
|
||||
*
|
||||
* This module centralizes the "after page.screenshot, check dimensions and
|
||||
* downscale if too big" path so every full-page caller in browse/src can
|
||||
* share the same enforcement. The cap is image-pixels, not CSS pixels,
|
||||
* matching the Anthropic API's own threshold.
|
||||
*
|
||||
* Used by: snapshot.ts (annotated, heatmap), meta-commands.ts (screenshot),
|
||||
* write-commands.ts (prettyscreenshot). See test/snapshot-meta-write-guard.test.ts.
|
||||
*
|
||||
* Closes #1214.
|
||||
*/
|
||||
|
||||
import { writeFileSync, readFileSync } from "fs";
|
||||
|
||||
const MAX_DIMENSION_PX = 2000;
|
||||
|
||||
export interface SizeGuardResult {
|
||||
/** True if the input image exceeded MAX_DIMENSION_PX and was downscaled. */
|
||||
resized: boolean;
|
||||
/** Final width and height (pixels) of the image as written/returned. */
|
||||
width: number;
|
||||
height: number;
|
||||
/** Original dimensions before any downscale. */
|
||||
originalWidth: number;
|
||||
originalHeight: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect an image buffer and downscale if its longest side exceeds the
|
||||
* 2000px Anthropic vision API cap. Preserves aspect ratio. Encodes back
|
||||
* to PNG. Returns the resulting buffer plus a diagnostic shape.
|
||||
*
|
||||
* Imports sharp lazily so the module load cost only hits screenshot paths
|
||||
* (sharp's native binding is non-trivial to initialize).
|
||||
*/
|
||||
export async function guardScreenshotBuffer(input: Buffer): Promise<{ buffer: Buffer; result: SizeGuardResult }> {
|
||||
const sharpModule = await import("sharp");
|
||||
const sharp = sharpModule.default ?? sharpModule;
|
||||
const image = sharp(input);
|
||||
const metadata = await image.metadata();
|
||||
const width = metadata.width ?? 0;
|
||||
const height = metadata.height ?? 0;
|
||||
|
||||
const longest = Math.max(width, height);
|
||||
if (longest <= MAX_DIMENSION_PX) {
|
||||
return {
|
||||
buffer: input,
|
||||
result: {
|
||||
resized: false,
|
||||
width,
|
||||
height,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const scale = MAX_DIMENSION_PX / longest;
|
||||
const newWidth = Math.round(width * scale);
|
||||
const newHeight = Math.round(height * scale);
|
||||
|
||||
const resized = await image
|
||||
.resize(newWidth, newHeight, { fit: "inside" })
|
||||
.png()
|
||||
.toBuffer();
|
||||
|
||||
process.stderr.write(
|
||||
`[screenshot-size-guard] image ${width}x${height} exceeded ${MAX_DIMENSION_PX}px max-dim; ` +
|
||||
`downscaled to ${newWidth}x${newHeight} to fit Anthropic vision API\n`,
|
||||
);
|
||||
|
||||
return {
|
||||
buffer: resized,
|
||||
result: {
|
||||
resized: true,
|
||||
width: newWidth,
|
||||
height: newHeight,
|
||||
originalWidth: width,
|
||||
originalHeight: height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* File-mode variant: read the image at the given path, downscale if
|
||||
* needed, and write the result back to the same path. Returns the
|
||||
* diagnostic shape. Use this after `await page.screenshot({ path, ... })`.
|
||||
*/
|
||||
export async function guardScreenshotPath(filePath: string): Promise<SizeGuardResult> {
|
||||
const input = readFileSync(filePath);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
if (result.resized) {
|
||||
writeFileSync(filePath, buffer);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export const SCREENSHOT_MAX_DIMENSION_PX = MAX_DIMENSION_PX;
|
||||
|
|
@ -23,6 +23,7 @@ import * as Diff from 'diff';
|
|||
import { TEMP_DIR, isPathWithin } from './platform';
|
||||
import { escapeEnvelopeSentinels } from './content-security';
|
||||
import { stripLoneSurrogates } from './sanitize';
|
||||
import { guardScreenshotPath } from './screenshot-size-guard';
|
||||
|
||||
// Roles considered "interactive" for the -i flag
|
||||
const INTERACTIVE_ROLES = new Set([
|
||||
|
|
@ -418,6 +419,7 @@ export async function handleSnapshot(
|
|||
}, boxes);
|
||||
|
||||
await page.screenshot({ path: screenshotPath, fullPage: true });
|
||||
await guardScreenshotPath(screenshotPath);
|
||||
|
||||
// Always remove overlays
|
||||
await page.evaluate(() => {
|
||||
|
|
@ -538,6 +540,7 @@ export async function handleSnapshot(
|
|||
}, boxes);
|
||||
|
||||
await page.screenshot({ path: heatmapPath, fullPage: true });
|
||||
await guardScreenshotPath(heatmapPath);
|
||||
|
||||
// Remove heatmap overlays
|
||||
await page.evaluate(() => {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { findInstalledBrowsers, importCookies, importCookiesViaCdp, hasV20Cookie
|
|||
import { generatePickerCode } from './cookie-picker-routes';
|
||||
import { validateNavigationUrl } from './url-validation';
|
||||
import { validateOutputPath, validateReadPath } from './path-security';
|
||||
import { guardScreenshotPath } from './screenshot-size-guard';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import type { SetContentWaitUntil } from './tab-session';
|
||||
|
|
@ -1123,6 +1124,10 @@ export async function handleWriteCommand(
|
|||
|
||||
// Take screenshot
|
||||
await page.screenshot({ path: outputPath, fullPage: !scrollTo });
|
||||
// Guard against Anthropic vision API >2000px brick (#1214). Only
|
||||
// applies to fullPage captures; scrollTo viewport-bound shots are
|
||||
// already capped by the viewport size.
|
||||
if (!scrollTo) await guardScreenshotPath(outputPath);
|
||||
|
||||
// Restore viewport
|
||||
if (viewportWidth && originalViewport) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Unit tests for the screenshot size guard (#1214).
|
||||
*
|
||||
* Verifies that images exceeding 2000px on the longest dimension get
|
||||
* downscaled to fit the Anthropic vision API cap, while images already
|
||||
* inside the cap pass through untouched.
|
||||
*
|
||||
* Integration with the three callsites (snapshot.ts, meta-commands.ts,
|
||||
* write-commands.ts) is exercised by the existing browse E2E suite — we
|
||||
* don't need to spin up Chromium just to verify the helper. The static
|
||||
* invariant test below pins that all three callsites import the guard.
|
||||
*/
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
|
||||
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs';
|
||||
import { tmpdir } from 'os';
|
||||
import { join } from 'path';
|
||||
import sharp from 'sharp';
|
||||
import {
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
guardScreenshotBuffer,
|
||||
guardScreenshotPath,
|
||||
} from '../src/screenshot-size-guard';
|
||||
|
||||
let tmp: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmp = mkdtempSync(join(tmpdir(), 'screenshot-guard-'));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
async function makePng(width: number, height: number): Promise<Buffer> {
|
||||
return sharp({
|
||||
create: { width, height, channels: 3, background: { r: 200, g: 50, b: 50 } },
|
||||
})
|
||||
.png()
|
||||
.toBuffer();
|
||||
}
|
||||
|
||||
describe('guardScreenshotBuffer', () => {
|
||||
test('passes through images already within the cap', async () => {
|
||||
const input = await makePng(1500, 1800);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(false);
|
||||
expect(result.width).toBe(1500);
|
||||
expect(result.height).toBe(1800);
|
||||
expect(buffer).toBe(input); // identity — no re-encode
|
||||
});
|
||||
|
||||
test('downscales a 5000px-tall image to fit the cap', async () => {
|
||||
const input = await makePng(1200, 5000);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(true);
|
||||
expect(result.originalHeight).toBe(5000);
|
||||
expect(Math.max(result.width, result.height)).toBeLessThanOrEqual(
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
);
|
||||
// Aspect ratio preserved.
|
||||
expect(result.height / result.width).toBeCloseTo(5000 / 1200, 1);
|
||||
// Buffer is a different (smaller) PNG.
|
||||
expect(buffer.length).toBeLessThan(input.length);
|
||||
});
|
||||
|
||||
test('downscales a 6000px-wide image', async () => {
|
||||
const input = await makePng(6000, 1200);
|
||||
const { buffer, result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(true);
|
||||
expect(result.originalWidth).toBe(6000);
|
||||
expect(Math.max(result.width, result.height)).toBeLessThanOrEqual(
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
);
|
||||
expect(buffer.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test('treats exactly-2000px images as in-bounds (no resize)', async () => {
|
||||
const input = await makePng(2000, 1000);
|
||||
const { result } = await guardScreenshotBuffer(input);
|
||||
expect(result.resized).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('guardScreenshotPath', () => {
|
||||
test('rewrites the file in place when downscale is needed', async () => {
|
||||
const filePath = join(tmp, 'tall.png');
|
||||
writeFileSync(filePath, await makePng(1200, 5000));
|
||||
const result = await guardScreenshotPath(filePath);
|
||||
expect(result.resized).toBe(true);
|
||||
const written = readFileSync(filePath);
|
||||
const meta = await sharp(written).metadata();
|
||||
expect(Math.max(meta.width ?? 0, meta.height ?? 0)).toBeLessThanOrEqual(
|
||||
SCREENSHOT_MAX_DIMENSION_PX,
|
||||
);
|
||||
});
|
||||
|
||||
test('leaves the file untouched when already within cap', async () => {
|
||||
const filePath = join(tmp, 'short.png');
|
||||
const original = await makePng(800, 600);
|
||||
writeFileSync(filePath, original);
|
||||
const result = await guardScreenshotPath(filePath);
|
||||
expect(result.resized).toBe(false);
|
||||
const written = readFileSync(filePath);
|
||||
expect(written.equals(original)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('static invariant: all three full-page callsites import the guard', () => {
|
||||
test('snapshot.ts, meta-commands.ts, and write-commands.ts wire the size guard', () => {
|
||||
const browseSrc = join(import.meta.dir, '..', 'src');
|
||||
const paths = ['snapshot.ts', 'meta-commands.ts', 'write-commands.ts'];
|
||||
for (const rel of paths) {
|
||||
const content = readFileSync(join(browseSrc, rel), 'utf-8');
|
||||
expect(content).toContain('screenshot-size-guard');
|
||||
}
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue