test(make-pdf): diagram pre-pass unit suite + e2e render gates

34 unit tests (fence extraction incl. nested/tilde/unclosed/render=false,
info-string parsing, slot substitution, diagnostic/figure escaping + SVG
script strip, byte-level dimension probing across 5 formats, content-box
math, image inlining incl. strict/remote/missing/data-URI paths). E2E gate
proves through the compiled binary: both fences render as vector text
(id-collision check), raw mermaid ships only via render=false, broken fence
yields the diagnostic block, and the relative fixture image rasterizes to
colored pixels (CRITICAL regression for the about:blank image fix).
--strict exits non-zero on a missing image.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Garry Tan 2026-06-11 23:59:40 -07:00
parent 67e87fe421
commit e7c0c1bf51
No known key found for this signature in database
GPG Key ID: C1F69E85C74EFE1D
4 changed files with 492 additions and 0 deletions

View File

@ -0,0 +1,300 @@
/**
* Unit tests for the diagram pre-pass: fence extraction, info-string parsing,
* slot substitution, diagnostic blocks, image inlining policy, and the
* byte-level image dimension prober. No browse daemon required the tab
* factory returns null so downscale paths are exercised as no-ops.
*/
import { describe, expect, test } from "bun:test";
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import zlib from "node:zlib";
import {
StrictModeError,
buildDiagnosticBlock,
buildDiagramFigure,
contentWidthInches,
dimToInches,
extractDiagramFences,
inlineLocalImages,
parseInfoString,
substituteSlots,
} from "../src/diagram-prepass";
import { imageDims } from "../src/image-size";
// ─── fence extraction ─────────────────────────────────────────────────
describe("extractDiagramFences", () => {
test("extracts a mermaid fence and replaces it with a token paragraph", () => {
const md = "# T\n\n```mermaid\ngraph LR\n A --> B\n```\n\ntail";
const { markdown, fences } = extractDiagramFences(md);
expect(fences).toHaveLength(1);
expect(fences[0].lang).toBe("mermaid");
expect(fences[0].source).toBe("graph LR\n A --> B");
expect(markdown).toContain(fences[0].token);
expect(markdown).not.toContain("```mermaid");
});
test("extracts excalidraw fences", () => {
const md = '```excalidraw\n{"type":"excalidraw","elements":[]}\n```';
const { fences } = extractDiagramFences(md);
expect(fences).toHaveLength(1);
expect(fences[0].lang).toBe("excalidraw");
});
test("render=false keeps the fence as code and strips the flag", () => {
const md = "```mermaid render=false\ngraph LR\n X --> Y\n```";
const { markdown, fences } = extractDiagramFences(md);
expect(fences).toHaveLength(0);
expect(markdown).toContain("```mermaid\ngraph LR");
expect(markdown).not.toContain("render=false");
});
test("title is captured from the info string", () => {
const md = '```mermaid title="Auth flow"\ngraph LR\n A --> B\n```';
const { fences } = extractDiagramFences(md);
expect(fences[0].title).toBe("Auth flow");
});
test("non-diagram fences pass through untouched", () => {
const md = "```js\nconst a = 1;\n```";
const { markdown, fences } = extractDiagramFences(md);
expect(fences).toHaveLength(0);
expect(markdown).toBe(md);
});
test("a mermaid example inside a plain fence is never extracted", () => {
const md = "````\n```mermaid\ngraph LR\n```\n````";
const { markdown, fences } = extractDiagramFences(md);
expect(fences).toHaveLength(0);
expect(markdown).toBe(md);
});
test("tilde fences work", () => {
const md = "~~~mermaid\ngraph TD\n A --> B\n~~~";
const { fences } = extractDiagramFences(md);
expect(fences).toHaveLength(1);
});
test("unclosed fence at EOF replays verbatim", () => {
const md = "```mermaid\ngraph LR\n A --> B";
const { markdown, fences } = extractDiagramFences(md);
expect(fences).toHaveLength(0);
expect(markdown).toBe(md);
});
test("multiple fences get distinct ordinals and tokens", () => {
const md = "```mermaid\nA\n```\n\nmiddle\n\n```mermaid\nB\n```";
const { fences } = extractDiagramFences(md);
expect(fences).toHaveLength(2);
expect(fences[0].ordinal).toBe(1);
expect(fences[1].ordinal).toBe(2);
expect(fences[0].token).not.toBe(fences[1].token);
});
});
describe("parseInfoString", () => {
test("plain language", () => {
expect(parseInfoString("mermaid")).toEqual({ lang: "mermaid", render: true, title: undefined });
});
test("render=false", () => {
expect(parseInfoString("mermaid render=false").render).toBe(false);
});
test("single-quoted title", () => {
expect(parseInfoString("mermaid title='Hi there'").title).toBe("Hi there");
});
});
// ─── slots ────────────────────────────────────────────────────────────
describe("substituteSlots", () => {
test("replaces the <p>-wrapped token with slot HTML", () => {
const slots = new Map([["gstack-diagram-slot-ab-1", "<figure>X</figure>"]]);
const html = "<h1>T</h1>\n<p>gstack-diagram-slot-ab-1</p>\n<p>tail</p>";
const out = substituteSlots(html, slots);
expect(out).toContain("<figure>X</figure>");
expect(out).not.toContain("gstack-diagram-slot");
expect(out).not.toContain("<p><figure>");
});
});
describe("diagnostic + figure blocks", () => {
const fence = {
lang: "mermaid", source: "graph LR\n A --> B", render: true,
token: "t", ordinal: 3, title: undefined,
};
test("diagnostic block escapes error content and names the lang", () => {
const block = buildDiagnosticBlock(fence, 'Parse <error> "quoted"');
expect(block).toContain("diagram-error");
expect(block).toContain("Diagram failed to render (mermaid)");
expect(block).toContain("Parse &lt;error&gt;");
expect(block).not.toContain("<error>");
});
test("figure carries role=img and ordinal-based aria-label fallback", () => {
const fig = buildDiagramFigure(fence, "<svg></svg>");
expect(fig).toContain('role="img"');
expect(fig).toContain('aria-label="diagram 3"');
expect(fig).toContain("<svg></svg>");
});
test("figure strips scripts from SVG (sanitizer second layer)", () => {
const fig = buildDiagramFigure(fence, "<svg><script>alert(1)</script><g/></svg>");
expect(fig).not.toContain("<script>");
});
test("title becomes aria-label and caption", () => {
const fig = buildDiagramFigure({ ...fence, title: "Auth flow" }, "<svg></svg>");
expect(fig).toContain('aria-label="Auth flow"');
expect(fig).toContain("diagram-caption");
});
});
// ─── image dimension probing ──────────────────────────────────────────
function tinyPng(w: number, h: number): Buffer {
const chunk = (t: string, d: Buffer) => {
const body = Buffer.concat([Buffer.from(t, "ascii"), d]);
const len = Buffer.alloc(4);
len.writeUInt32BE(d.length);
const crc = Buffer.alloc(4);
crc.writeUInt32BE(zlib.crc32 ? zlib.crc32(body) : 0);
return Buffer.concat([len, body, crc]);
};
const ihdr = Buffer.alloc(13);
ihdr.writeUInt32BE(w, 0);
ihdr.writeUInt32BE(h, 4);
ihdr[8] = 8; ihdr[9] = 2;
const raw = Buffer.concat(
Array.from({ length: h }, () => Buffer.concat([Buffer.from([0]), Buffer.alloc(w * 3, 0x80)])),
);
return Buffer.concat([
Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]),
chunk("IHDR", ihdr),
chunk("IDAT", zlib.deflateSync(raw)),
chunk("IEND", Buffer.alloc(0)),
]);
}
describe("imageDims", () => {
test("PNG", () => {
expect(imageDims(tinyPng(640, 480))).toEqual({ width: 640, height: 480, mime: "image/png" });
});
test("GIF", () => {
const b = Buffer.alloc(13);
b.write("GIF89a", 0, "ascii");
b.writeUInt16LE(320, 6);
b.writeUInt16LE(200, 8);
expect(imageDims(b)).toEqual({ width: 320, height: 200, mime: "image/gif" });
});
test("JPEG (SOF0)", () => {
const b = Buffer.from([
0xff, 0xd8, // SOI
0xff, 0xe0, 0x00, 0x04, 0x00, 0x00, // APP0 len 4
0xff, 0xc0, 0x00, 0x0b, 0x08, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00, 0x00, // SOF0 h=256 w=512
]);
expect(imageDims(b)).toEqual({ width: 512, height: 256, mime: "image/jpeg" });
});
test("SVG via width/height attrs", () => {
const b = Buffer.from('<svg xmlns="x" width="800" height="400"></svg>');
expect(imageDims(b)).toEqual({ width: 800, height: 400, mime: "image/svg+xml" });
});
test("SVG via viewBox", () => {
const b = Buffer.from('<svg viewBox="0 0 1200 600"></svg>');
expect(imageDims(b)).toEqual({ width: 1200, height: 600, mime: "image/svg+xml" });
});
test("unknown bytes → null", () => {
expect(imageDims(Buffer.from("definitely not an image, sorry"))).toBeNull();
});
});
// ─── content-box math ─────────────────────────────────────────────────
describe("content width", () => {
test("letter with 1in margins = 6.5in", () => {
expect(contentWidthInches({})).toBeCloseTo(6.5);
});
test("a4 with 25mm margins", () => {
expect(contentWidthInches({ pageSize: "a4", margins: "25mm" })).toBeCloseTo(8.27 - 50 / 25.4, 2);
});
test("dimToInches parses pt/cm/mm/px", () => {
expect(dimToInches("72pt", 1)).toBeCloseTo(1);
expect(dimToInches("2.54cm", 1)).toBeCloseTo(1);
expect(dimToInches("25.4mm", 1)).toBeCloseTo(1);
expect(dimToInches("96px", 1)).toBeCloseTo(1);
expect(dimToInches("garbage", 1.5)).toBe(1.5);
});
});
// ─── image inlining ───────────────────────────────────────────────────
describe("inlineLocalImages", () => {
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "prepass-img-"));
fs.writeFileSync(path.join(dir, "ok.png"), tinyPng(40, 20));
const base = {
inputDir: dir,
strict: false,
allowNetwork: false,
contentWidthIn: 6.5,
getTab: () => null,
};
test("local image becomes a data URI with probed dimensions", () => {
const warnings: string[] = [];
const out = inlineLocalImages(`<img src="ok.png" alt="x">`, { ...base, warn: (m) => warnings.push(m) });
expect(out).toContain("data:image/png;base64,");
expect(out).toContain('data-gstack-px-width="40"');
expect(out).toContain('data-gstack-px-height="20"');
expect(warnings).toHaveLength(0);
});
test("missing image → visible placeholder + warning", () => {
const warnings: string[] = [];
const out = inlineLocalImages(`<img src="nope.png">`, { ...base, warn: (m) => warnings.push(m) });
expect(out).toContain("image-missing");
expect(out).toContain("nope.png");
expect(warnings.length).toBe(1);
});
test("missing image + --strict → StrictModeError", () => {
expect(() =>
inlineLocalImages(`<img src="nope.png">`, { ...base, strict: true, warn: () => {} }),
).toThrow(StrictModeError);
});
test("remote image warns and is left untouched (offline posture)", () => {
const warnings: string[] = [];
const tag = `<img src="https://example.com/x.png">`;
const out = inlineLocalImages(tag, { ...base, warn: (m) => warnings.push(m) });
expect(out).toBe(tag);
expect(warnings[0]).toContain("offline");
});
test("remote image + --allow-network passes silently", () => {
const warnings: string[] = [];
const tag = `<img src="https://example.com/x.png">`;
const out = inlineLocalImages(tag, { ...base, allowNetwork: true, warn: (m) => warnings.push(m) });
expect(out).toBe(tag);
expect(warnings).toHaveLength(0);
});
test("remote image + --strict → StrictModeError", () => {
expect(() =>
inlineLocalImages(`<img src="https://example.com/x.png">`, { ...base, strict: true, warn: () => {} }),
).toThrow(StrictModeError);
});
test("existing data URI gets dimension annotations only", () => {
const uri = `data:image/png;base64,${tinyPng(33, 44).toString("base64")}`;
const out = inlineLocalImages(`<img src="${uri}">`, { ...base, warn: () => {} });
expect(out).toContain('data-gstack-px-width="33"');
expect(out).toContain('data-gstack-px-height="44"');
});
test("oversized raster without a tab inlines at full size with no downscale", () => {
// 6000px-wide PNG header (body irrelevant for probing; file must exist)
fs.writeFileSync(path.join(dir, "wide.png"), tinyPng(6000, 100));
const warnings: string[] = [];
const out = inlineLocalImages(`<img src="wide.png">`, { ...base, warn: (m) => warnings.push(m) });
expect(out).toContain('data-gstack-px-width="6000"');
});
});

View File

@ -0,0 +1,155 @@
/**
* Diagram render gate proves the diagram pre-pass works end-to-end through
* the compiled binary: mermaid fences render as vector SVG (not raw code),
* multiple fences coexist (id-collision check), render=false keeps source,
* a broken fence yields a visible diagnostic block, and a relative local
* image actually renders (CRITICAL regression pre-pass D1 fixed the
* setContent/about:blank path where relative images silently 404'd).
*
* Oracles (per the emoji-gate lessons text extraction alone lies):
* 1. pdftotext: node labels from BOTH diagrams present (vector text made it
* into the PDF), diagnostic title present, raw mermaid only where
* render=false kept it.
* 2. pdftoppm + saturated-pixel count: the red fixture image rasterizes to
* colored pixels text extraction can't fake that.
*
* Free-tier deterministic gate: runs under plain `bun test` when the compiled
* binaries + poppler are available; hard-fails in CI when missing.
*/
import { describe, expect, test } from "bun:test";
import { execFileSync } from "node:child_process";
import * as fs from "node:fs";
import * as path from "node:path";
import { resolvePopplerTool } from "../../src/pdftotext";
const FIXTURE = path.resolve(__dirname, "../fixtures/diagram-gate.md");
const ROOT = path.resolve(__dirname, "../../..");
const PDF_BIN = path.join(ROOT, "make-pdf/dist/pdf");
const BROWSE_BIN = path.join(ROOT, "browse/dist/browse");
const BUNDLE = path.join(ROOT, "lib/diagram-render/dist/diagram-render.html");
const CHILD_TIMEOUT_MS = 60_000;
// The 80x40 red fixture image at 100dpi occupies ~80x40 px of strong red.
// Floor sits well below that but far above AA noise.
const SATURATED_PIXEL_FLOOR = 500;
const SATURATION_DELTA = 60;
function prerequisitesAvailable(): { ok: true } | { ok: false; reason: string } {
if (!fs.existsSync(PDF_BIN)) return { ok: false, reason: `make-pdf binary missing (${PDF_BIN}). Run bun run build.` };
if (!fs.existsSync(BROWSE_BIN)) return { ok: false, reason: `browse binary missing (${BROWSE_BIN}).` };
if (!fs.existsSync(BUNDLE)) return { ok: false, reason: `diagram-render bundle missing (${BUNDLE}). Run bun run build:diagram-render.` };
if (!fs.existsSync(FIXTURE)) return { ok: false, reason: `fixture missing (${FIXTURE}).` };
if (!resolvePopplerTool("pdftotext")) return { ok: false, reason: "pdftotext not found (install poppler-utils)." };
if (!resolvePopplerTool("pdftoppm")) return { ok: false, reason: "pdftoppm not found (install poppler-utils)." };
return { ok: true };
}
function countSaturatedPixels(ppmPath: string, delta: number): number {
const b = fs.readFileSync(ppmPath);
let i = 0;
const token = (): string => {
while (i < b.length && (b[i] === 0x20 || b[i] === 0x0a || b[i] === 0x09 || b[i] === 0x0d)) i++;
if (b[i] === 0x23) { while (i < b.length && b[i] !== 0x0a) i++; return token(); }
const s = i;
while (i < b.length && b[i] !== 0x20 && b[i] !== 0x0a && b[i] !== 0x09 && b[i] !== 0x0d) i++;
return b.slice(s, i).toString("ascii");
};
if (token() !== "P6") throw new Error("expected P6 PPM");
const w = Number(token());
const h = Number(token());
if (Number(token()) !== 255) throw new Error("expected 8-bit PPM");
i++;
let sat = 0;
for (let p = 0; p < w * h; p++) {
const o = i + p * 3;
if (Math.max(b[o], b[o + 1], b[o + 2]) - Math.min(b[o], b[o + 1], b[o + 2]) > delta) sat++;
}
return sat;
}
describe("diagram render gate", () => {
const avail = prerequisitesAvailable();
test.skipIf(!avail.ok)("mermaid fences render as vector diagrams; images and diagnostics behave", () => {
if (!avail.ok) return;
const workDir = fs.mkdtempSync("/tmp/make-pdf-diagram-gate-");
const outputPdf = path.join(workDir, "out.pdf");
const ppmPrefix = path.join(workDir, "page");
try {
execFileSync(PDF_BIN, ["generate", FIXTURE, outputPdf, "--quiet"], {
encoding: "utf8",
env: { ...process.env, BROWSE_BIN },
stdio: ["ignore", "pipe", "pipe"],
timeout: CHILD_TIMEOUT_MS,
});
expect(fs.existsSync(outputPdf)).toBe(true);
const pdftotext = resolvePopplerTool("pdftotext")!;
const text = execFileSync(pdftotext, [outputPdf, "-"], { encoding: "utf8", timeout: CHILD_TIMEOUT_MS });
// 1. Vector text from BOTH diagrams (multi-fence + id-collision check —
// a collided render drops the second diagram's content).
for (const label of ["gatealphanode", "gatebetanode", "gategammanode", "gatedeltanode", "gateepsilonnode"]) {
expect(text).toContain(label);
}
// 2. Rendered fences must NOT ship raw mermaid; render=false must.
expect(text).not.toContain("GATEALPHA[");
expect(text).toContain("RAWKEPT");
expect(text).toContain("ASCODE");
// 3. The broken fence produced a visible diagnostic, not silence.
expect(text).toContain("Diagram failed to render (mermaid)");
// 4. CRITICAL regression: the relative image rasterizes to color.
const pdftoppm = resolvePopplerTool("pdftoppm")!;
execFileSync(pdftoppm, ["-r", "100", "-f", "1", "-l", "1", "-singlefile", outputPdf, ppmPrefix], {
stdio: ["ignore", "pipe", "pipe"],
timeout: CHILD_TIMEOUT_MS,
});
const saturated = countSaturatedPixels(`${ppmPrefix}.ppm`, SATURATION_DELTA);
if (saturated < SATURATED_PIXEL_FLOOR) {
process.stderr.write(`\n[diagram-gate] saturated pixels: ${saturated} (floor ${SATURATED_PIXEL_FLOOR})\n`);
}
expect(saturated).toBeGreaterThanOrEqual(SATURATED_PIXEL_FLOOR);
} finally {
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}, 120000);
test.skipIf(!avail.ok)("--strict fails on a missing image with a non-zero exit", () => {
if (!avail.ok) return;
const workDir = fs.mkdtempSync("/tmp/make-pdf-diagram-strict-");
const md = path.join(workDir, "doc.md");
fs.writeFileSync(md, "# T\n\n![gone](./does-not-exist.png)\n");
try {
let failed = false;
try {
execFileSync(PDF_BIN, ["generate", md, path.join(workDir, "out.pdf"), "--quiet", "--strict"], {
encoding: "utf8",
env: { ...process.env, BROWSE_BIN },
stdio: ["ignore", "pipe", "pipe"],
timeout: CHILD_TIMEOUT_MS,
});
} catch (err: any) {
failed = true;
const stderr = err.stderr?.toString() ?? "";
expect(stderr).toContain("image not found");
}
expect(failed).toBe(true);
} finally {
try { fs.rmSync(workDir, { recursive: true, force: true }); } catch { /* ignore */ }
}
}, 120000);
if (!avail.ok) {
test("diagram gate prerequisites are present (hard-required in CI)", () => {
if (process.env.CI) {
throw new Error(`diagram gate prerequisites missing in CI: ${avail.reason}`);
}
console.warn(`[skip] ${avail.reason}`);
});
}
});

Binary file not shown.

After

Width:  |  Height:  |  Size: 131 B

37
make-pdf/test/fixtures/diagram-gate.md vendored Normal file
View File

@ -0,0 +1,37 @@
# Diagram Gate
A relative local image (CRITICAL regression: must render, not 404):
![a red box](./diagram-assets/red-box.png)
## First diagram
```mermaid title="Gate pipeline"
graph LR
GATEALPHA[gatealphanode] --> GATEBETA{gatebetanode}
GATEBETA -->|yes| GATEGAMMA[gategammanode]
```
## Second diagram (id-collision check)
```mermaid
graph TD
GATEDELTA[gatedeltanode] --> GATEEPSILON[gateepsilonnode]
```
## Kept as source
```mermaid render=false
graph LR
RAWKEPT --> ASCODE
```
## Deliberately broken
```mermaid
graph LR
A -->
(((
```
Done.