feat(make-pdf): width directives + conservative auto-landscape via CSS named pages

`![a](x.png){width=full|<pct>|<dim>}` and `{page=landscape|portrait}`
suffixes translate to data-gstack-* attrs in render() (before the sanitizer,
which keeps data- attributes; unrecognized brace groups stay visible text).
Default width rule needs no code: intrinsic CSS-px capped at the content box,
never upscaled — figure img max-width owns it.

Auto-landscape promotes a block to `@page wide { size: <pagesize> landscape }`
only when aspect >= 1.8 AND intrinsic width > 2.5x the content box (~1600px on
letter) AND diagram provenance (rendered fences) or a whole-word alt token
(diagram|architecture|flowchart|chart|graph) for plain images. {page=...}
forces or vetoes; fence info strings accept page=... too. preferCSSPageSize
is passed to Chromium only when a promotion exists, so every other document
prints exactly as before. False negatives are cheap; false positives feel
broken (eng-review P4, Codex challenge accepted).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Garry Tan 2026-06-12 00:06:45 -07:00
parent e7c0c1bf51
commit 89c35352cc
No known key found for this signature in database
GPG Key ID: C1F69E85C74EFE1D
5 changed files with 268 additions and 6 deletions

View File

@ -42,6 +42,8 @@ export interface DiagramFence {
source: string;
/** Optional title="..." from the fence info string (a11y label, D6.4). */
title?: string;
/** Optional page=landscape|portrait fence directive (image-policy override). */
page?: "landscape" | "portrait";
/** render=false → leave as a plain code block (escape hatch, D6.3). */
render: boolean;
/** Placeholder token substituted into the markdown. */
@ -119,6 +121,7 @@ export function extractDiagramFences(markdown: string): FenceExtraction {
lang: info.lang,
source: openFence.body.join("\n"),
title: info.title,
page: info.page,
render: true,
token,
ordinal,
@ -181,13 +184,18 @@ function matchFenceLine(line: string): { char: string; len: number; info: string
return { char: m[1][0], len: m[1].length, info: m[2].trim() };
}
/** Parse a fence info string: `mermaid`, `mermaid render=false`, `mermaid title="Auth flow"`. */
export function parseInfoString(info: string): { lang: string; render: boolean; title?: string } {
/** Parse a fence info string: `mermaid`, `mermaid render=false`,
* `mermaid title="Auth flow"`, `mermaid page=landscape`. */
export function parseInfoString(info: string): {
lang: string; render: boolean; title?: string; page?: "landscape" | "portrait";
} {
const lang = (info.match(/^\S+/)?.[0] ?? "").toLowerCase();
const render = !/\brender\s*=\s*false\b/i.test(info);
const title = info.match(/\btitle\s*=\s*"([^"]*)"/i)?.[1]
?? info.match(/\btitle\s*=\s*'([^']*)'/i)?.[1];
return { lang, render, title };
const pageRaw = info.match(/\bpage\s*=\s*(landscape|portrait)\b/i)?.[1]?.toLowerCase();
const page = pageRaw === "landscape" || pageRaw === "portrait" ? pageRaw : undefined;
return { lang, render, title, page };
}
// ─── Slot substitution (pure) ─────────────────────────────────────────
@ -233,8 +241,9 @@ export function buildDiagramFigure(fence: DiagramFence, svg: string): string {
const captioned = fence.title
? `\n<figcaption class="diagram-caption">${escapeHtml(fence.title)}</figcaption>`
: "";
const pageAttr = fence.page ? ` data-gstack-page="${fence.page}"` : "";
return [
`<figure class="diagram" role="img" aria-label="${escapeAttr(label)}">`,
`<figure class="diagram" role="img" aria-label="${escapeAttr(label)}"${pageAttr}>`,
`<!-- gstack-diagram-source lang=${escapeAttr(fence.lang)}`,
escapeHtmlComment(fence.source),
`-->`,

View File

@ -0,0 +1,219 @@
/**
* Image width policy + conservative auto-landscape (eng-review P4, D4 spec).
*
* Two pure passes over rendered HTML:
*
* 1. applyImageDirectives runs inside render() right after marked, before
* the sanitizer. Translates the markdown-adjacent directive suffix
* `![alt](x.png){width=50%}` / `{page=landscape}` into data-gstack-*
* attributes (the sanitizer keeps data- attributes; the brace text is
* consumed so it never reaches smartypants or the page).
*
* 2. applyImagePolicy runs in the orchestrator after image inlining (which
* annotates data-gstack-px-width/-height from real bytes). Applies the
* width rule and decides landscape promotion:
*
* WIDTH RULE: render at intrinsic CSS-px width, capped at the content box,
* never upscaled that is exactly `figure img { max-width: 100% }` doing
* its job, so the default needs no inline style. Directives opt into more:
* width=full stretches to the content box; <pct>/<dim> set explicit width.
*
* LANDSCAPE (conservative, false negatives are cheap):
* promote only when ALL hold
* aspect ratio 1.8
* AND intrinsic CSS-px width > SHRINK_LIMIT × content box
* (content shrunk below ~40% of natural size = unreadable)
* AND diagram provenance (rendered fence) or an alt-text token from
* ALT_HINT_TOKENS (plain images)
* `{page=landscape}` forces, `{page=portrait}` vetoes both skip the
* heuristics entirely.
*
* Promotion wraps the block in <div class="page-wide"> whose CSS named
* page (`@page wide { size: <size> landscape }`, print-css.ts) rotates
* just that page. Chromium only honors CSS page sizes when the print call
* passes preferCSSPageSize the orchestrator sets it when hasLandscape.
*/
export interface ImagePolicyOptions {
/** Physical content-box width in inches (page width minus margins). */
contentWidthIn: number;
warn: (msg: string) => void;
}
export interface ImagePolicyResult {
html: string;
/** True when at least one block was promoted to the landscape named page. */
hasLandscape: boolean;
}
/** Aspect ratio floor for auto-promotion. */
const MIN_ASPECT = 1.8;
/**
* Auto-promote only when the intrinsic CSS-px width exceeds this multiple of
* the content box (in CSS px @96dpi). 2.5 the plan's ~1600px threshold on a
* 6.5in letter box; calibrated against fixtures (design doc Open Question 4).
*/
const SHRINK_LIMIT = 2.5;
/** Alt-text tokens that mark a plain image as diagram-like (case-insensitive). */
const ALT_HINT_TOKENS = ["diagram", "architecture", "flowchart", "chart", "graph"];
// ─── Pass 1: directive suffixes ───────────────────────────────────────
const IMG_WITH_SUFFIX_RE = /(<img\b[^>]*>)\s*\{([^{}<>\n]{1,120})\}/gi;
/**
* Consume `{...}` directive suffixes adjacent to <img> tags. Unrecognized
* brace groups are left untouched (someone's literal prose).
*/
export function applyImageDirectives(html: string): string {
return html.replace(IMG_WITH_SUFFIX_RE, (full, imgTag: string, body: string) => {
const parsed = parseDirectives(body);
if (!parsed) return full;
let tag = imgTag;
if (parsed.width) tag = addAttr(tag, "data-gstack-width", parsed.width);
if (parsed.page) tag = addAttr(tag, "data-gstack-page", parsed.page);
return tag;
});
}
export function parseDirectives(body: string): { width?: string; page?: string } | null {
let width: string | undefined;
let page: string | undefined;
let recognized = false;
for (const part of body.trim().split(/\s+/)) {
const m = part.match(/^(width|page)=(.+)$/i);
if (!m) return null; // any unknown token ⇒ not a directive group
const key = m[1].toLowerCase();
const value = m[2].toLowerCase();
if (key === "width" && /^(full|\d{1,3}%|[0-9.]+(in|cm|mm|pt|px))$/.test(value)) {
width = value;
recognized = true;
} else if (key === "page" && /^(landscape|portrait)$/.test(value)) {
page = value;
recognized = true;
} else {
return null; // recognized key, malformed value ⇒ leave visible, not silent
}
}
return recognized ? { width, page } : null;
}
function addAttr(imgTag: string, name: string, value: string): string {
return imgTag.replace(/^<img\b/i, `<img ${name}="${value}"`);
}
// ─── Pass 2: width styles + landscape promotion ───────────────────────
export function applyImagePolicy(html: string, opts: ImagePolicyOptions): ImagePolicyResult {
let hasLandscape = false;
const boxCssPx = opts.contentWidthIn * 96;
const widthThresholdPx = boxCssPx * SHRINK_LIMIT;
// 2a. width directives → inline styles on the img.
let out = html.replace(/<img\b[^>]*>/gi, (tag) => {
const width = attrValue(tag, "data-gstack-width");
if (!width) return tag;
const css = width === "full" ? "100%" : width;
return mergeStyle(tag, `width: ${css}; height: auto;`);
});
// 2b. landscape promotion — standalone images (markdown images render as
// <p><img …></p>; promote by swapping the paragraph for the wide wrapper).
out = out.replace(/<p>\s*(<img\b[^>]*>)\s*<\/p>/gi, (full, tag: string) => {
const decision = decideImagePromotion(tag, widthThresholdPx);
if (!decision.promote) return full;
hasLandscape = true;
opts.warn(`promoting image to a landscape page (${decision.reason})`);
return `<div class="page-wide">${tag}</div>`;
});
// 2c. landscape promotion — rendered diagram figures (provenance is
// automatic; dims come from the SVG's width/height or viewBox).
out = out.replace(
/<figure class="diagram[^"]*"[^>]*>[\s\S]*?<\/figure>/gi,
(figure) => {
if (figure.includes("diagram-error")) return figure;
const decision = decideDiagramPromotion(figure, widthThresholdPx);
if (!decision.promote) return figure;
hasLandscape = true;
opts.warn(`promoting diagram to a landscape page (${decision.reason})`);
return `<div class="page-wide">${figure}</div>`;
},
);
return { html: out, hasLandscape };
}
interface PromotionDecision {
promote: boolean;
reason: string;
}
function decideImagePromotion(tag: string, widthThresholdPx: number): PromotionDecision {
const page = attrValue(tag, "data-gstack-page");
if (page === "portrait") return { promote: false, reason: "page=portrait veto" };
if (page === "landscape") return { promote: true, reason: "page=landscape directive" };
const w = num(attrValue(tag, "data-gstack-px-width"));
const h = num(attrValue(tag, "data-gstack-px-height"));
if (!w || !h) return { promote: false, reason: "no intrinsic dimensions" };
if (w / h < MIN_ASPECT) return { promote: false, reason: "aspect below floor" };
if (w <= widthThresholdPx) return { promote: false, reason: "fits portrait readably" };
const alt = (attrValue(tag, "alt") ?? "").toLowerCase();
const hinted = ALT_HINT_TOKENS.some((t) => new RegExp(`\\b${t}\\b`).test(alt));
if (!hinted) return { promote: false, reason: "no diagram hint in alt text" };
return { promote: true, reason: `wide diagram-like image (${Math.round(w)}px, alt hint)` };
}
function decideDiagramPromotion(figure: string, widthThresholdPx: number): PromotionDecision {
const page = attrValue(figure, "data-gstack-page");
if (page === "portrait") return { promote: false, reason: "page=portrait veto" };
if (page === "landscape") return { promote: true, reason: "page=landscape fence directive" };
const dims = svgCssDims(figure);
if (!dims) return { promote: false, reason: "no measurable SVG dimensions" };
if (dims.width / dims.height < MIN_ASPECT) return { promote: false, reason: "aspect below floor" };
if (dims.width <= widthThresholdPx) return { promote: false, reason: "fits portrait readably" };
return { promote: true, reason: `wide diagram (${Math.round(dims.width)}px)` };
}
/**
* Best-effort CSS-px dimensions of the first <svg> in a figure: explicit
* width/height attributes (px or unitless) first, else viewBox.
*/
function svgCssDims(figure: string): { width: number; height: number } | null {
const tag = figure.match(/<svg\b[^>]*>/i)?.[0];
if (!tag) return null;
const attrNum = (name: string): number | null => {
const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*["']\\s*([0-9.]+)(px)?\\s*["']`, "i"));
return m ? parseFloat(m[1]) : null;
};
const w = attrNum("width");
const h = attrNum("height");
if (w && h) return { width: w, height: h };
const vb = tag.match(/\bviewBox\s*=\s*["']\s*[-0-9.]+[\s,]+[-0-9.]+[\s,]+([0-9.]+)[\s,]+([0-9.]+)\s*["']/i);
if (vb) return { width: parseFloat(vb[1]), height: parseFloat(vb[2]) };
return null;
}
function attrValue(tag: string, name: string): string | null {
const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*"([^"]*)"`, "i"))
?? tag.match(new RegExp(`\\b${name}\\s*=\\s*'([^']*)'`, "i"));
return m ? m[1] : null;
}
function num(s: string | null): number | null {
if (s === null) return null;
const n = parseFloat(s);
return Number.isFinite(n) && n > 0 ? n : null;
}
function mergeStyle(tag: string, css: string): string {
const existing = attrValue(tag, "style");
if (existing !== null) {
return tag.replace(/\bstyle\s*=\s*(".*?"|'.*?')/i, `style="${existing.replace(/"/g, "")}; ${css}"`);
}
return tag.replace(/^<img\b/i, `<img style="${css}"`);
}

View File

@ -32,6 +32,7 @@ import {
renderFenceSlots,
substituteSlots,
} from "./diagram-prepass";
import { applyImagePolicy } from "./image-policy";
class ProgressReporter {
private readonly quiet: boolean;
@ -119,6 +120,7 @@ export async function generate(opts: GenerateOptions): Promise<string> {
if (!opts.quiet) process.stderr.write(`\r\x1b[K[make-pdf] warning: ${msg}\n`);
};
let renderTab: RenderTab | null = null;
let hasLandscape = false;
const getRenderTab = (): RenderTab | null => {
if (renderTab) return renderTab;
try {
@ -153,15 +155,21 @@ export async function generate(opts: GenerateOptions): Promise<string> {
}
progress.begin("Inlining images");
const contentWidthIn = contentWidthInches(opts);
finalHtml = inlineLocalImages(finalHtml, {
inputDir: path.dirname(input),
strict: opts.strict === true,
allowNetwork: opts.allowNetwork === true,
contentWidthIn: contentWidthInches(opts),
contentWidthIn,
warn,
getTab: getRenderTab,
});
progress.end("Inlining images");
// Width directives + conservative auto-landscape (image-policy).
const policy = applyImagePolicy(finalHtml, { contentWidthIn, warn });
finalHtml = policy.html;
hasLandscape = policy.hasLandscape;
} finally {
renderTab?.close();
}
@ -212,6 +220,10 @@ export async function generate(opts: GenerateOptions): Promise<string> {
tagged: opts.tagged !== false,
outline: opts.outline !== false,
printBackground: !!opts.watermark,
// Named landscape pages only take effect when Chromium honors CSS page
// sizes. Flip it ONLY when a promotion exists — minimal behavior change
// for every other document.
preferCSSPageSize: hasLandscape ? true : undefined,
toc: opts.toc,
});
progress.end("Generating PDF");

View File

@ -118,6 +118,22 @@ function pageRules(size: string, margin: string, opts: PrintCssOptions): string
` @bottom-center { content: none; }`,
` @bottom-right { content: none; }`,
`}`,
``,
// Landscape named page for promoted wide diagrams/images (image-policy).
// Chromium-only — exactly the engine this pipeline always prints with.
// Honored only when the print call passes preferCSSPageSize (orchestrator
// sets it when a promotion exists).
`@page wide {`,
` size: ${size} landscape;`,
` margin: ${margin};`,
`}`,
`.page-wide {`,
` page: wide;`,
` break-before: page;`,
` break-after: page;`,
`}`,
`.page-wide img, .page-wide svg { width: 100%; height: auto; max-width: none; }`,
`.page-wide figure.diagram > svg { max-width: none; }`,
].filter(line => line !== "").join("\n");
}

View File

@ -14,6 +14,7 @@
import { marked } from "marked";
import { smartypants } from "./smartypants";
import { printCss, type PrintCssOptions } from "./print-css";
import { applyImageDirectives } from "./image-policy";
export interface RenderOptions {
markdown: string;
@ -60,8 +61,13 @@ export function render(opts: RenderOptions): RenderResult {
// 1. Markdown → HTML
const rawHtml = marked.parse(opts.markdown, { async: false }) as string;
// 1.5. Image directive suffixes: `![a](x.png){width=50%}` → data-gstack-*
// attributes. Before the sanitizer (which keeps data- attrs) so the brace
// text never reaches smartypants or the final page.
const directedHtml = applyImageDirectives(rawHtml);
// 2. Sanitize
const cleanHtml = sanitizeUntrustedHtml(rawHtml);
const cleanHtml = sanitizeUntrustedHtml(directedHtml);
// 3. Decode common entities so smartypants can match raw " and '.
// marked HTML-encodes quotes in text ("hello" → &quot;hello&quot;);