mirror of https://github.com/garrytan/gstack.git
feat(make-pdf): width directives + conservative auto-landscape via CSS named pages
`{width=full|<pct>|<dim>}` and `{page=landscape|portrait}`
suffixes translate to data-gstack-* attrs in render() (before the sanitizer,
which keeps data- attributes; unrecognized brace groups stay visible text).
Default width rule needs no code: intrinsic CSS-px capped at the content box,
never upscaled — figure img max-width owns it.
Auto-landscape promotes a block to `@page wide { size: <pagesize> landscape }`
only when aspect >= 1.8 AND intrinsic width > 2.5x the content box (~1600px on
letter) AND diagram provenance (rendered fences) or a whole-word alt token
(diagram|architecture|flowchart|chart|graph) for plain images. {page=...}
forces or vetoes; fence info strings accept page=... too. preferCSSPageSize
is passed to Chromium only when a promotion exists, so every other document
prints exactly as before. False negatives are cheap; false positives feel
broken (eng-review P4, Codex challenge accepted).
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
e7c0c1bf51
commit
89c35352cc
|
|
@ -42,6 +42,8 @@ export interface DiagramFence {
|
|||
source: string;
|
||||
/** Optional title="..." from the fence info string (a11y label, D6.4). */
|
||||
title?: string;
|
||||
/** Optional page=landscape|portrait fence directive (image-policy override). */
|
||||
page?: "landscape" | "portrait";
|
||||
/** render=false → leave as a plain code block (escape hatch, D6.3). */
|
||||
render: boolean;
|
||||
/** Placeholder token substituted into the markdown. */
|
||||
|
|
@ -119,6 +121,7 @@ export function extractDiagramFences(markdown: string): FenceExtraction {
|
|||
lang: info.lang,
|
||||
source: openFence.body.join("\n"),
|
||||
title: info.title,
|
||||
page: info.page,
|
||||
render: true,
|
||||
token,
|
||||
ordinal,
|
||||
|
|
@ -181,13 +184,18 @@ function matchFenceLine(line: string): { char: string; len: number; info: string
|
|||
return { char: m[1][0], len: m[1].length, info: m[2].trim() };
|
||||
}
|
||||
|
||||
/** Parse a fence info string: `mermaid`, `mermaid render=false`, `mermaid title="Auth flow"`. */
|
||||
export function parseInfoString(info: string): { lang: string; render: boolean; title?: string } {
|
||||
/** Parse a fence info string: `mermaid`, `mermaid render=false`,
|
||||
* `mermaid title="Auth flow"`, `mermaid page=landscape`. */
|
||||
export function parseInfoString(info: string): {
|
||||
lang: string; render: boolean; title?: string; page?: "landscape" | "portrait";
|
||||
} {
|
||||
const lang = (info.match(/^\S+/)?.[0] ?? "").toLowerCase();
|
||||
const render = !/\brender\s*=\s*false\b/i.test(info);
|
||||
const title = info.match(/\btitle\s*=\s*"([^"]*)"/i)?.[1]
|
||||
?? info.match(/\btitle\s*=\s*'([^']*)'/i)?.[1];
|
||||
return { lang, render, title };
|
||||
const pageRaw = info.match(/\bpage\s*=\s*(landscape|portrait)\b/i)?.[1]?.toLowerCase();
|
||||
const page = pageRaw === "landscape" || pageRaw === "portrait" ? pageRaw : undefined;
|
||||
return { lang, render, title, page };
|
||||
}
|
||||
|
||||
// ─── Slot substitution (pure) ─────────────────────────────────────────
|
||||
|
|
@ -233,8 +241,9 @@ export function buildDiagramFigure(fence: DiagramFence, svg: string): string {
|
|||
const captioned = fence.title
|
||||
? `\n<figcaption class="diagram-caption">${escapeHtml(fence.title)}</figcaption>`
|
||||
: "";
|
||||
const pageAttr = fence.page ? ` data-gstack-page="${fence.page}"` : "";
|
||||
return [
|
||||
`<figure class="diagram" role="img" aria-label="${escapeAttr(label)}">`,
|
||||
`<figure class="diagram" role="img" aria-label="${escapeAttr(label)}"${pageAttr}>`,
|
||||
`<!-- gstack-diagram-source lang=${escapeAttr(fence.lang)}`,
|
||||
escapeHtmlComment(fence.source),
|
||||
`-->`,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,219 @@
|
|||
/**
|
||||
* Image width policy + conservative auto-landscape (eng-review P4, D4 spec).
|
||||
*
|
||||
* Two pure passes over rendered HTML:
|
||||
*
|
||||
* 1. applyImageDirectives — runs inside render() right after marked, before
|
||||
* the sanitizer. Translates the markdown-adjacent directive suffix
|
||||
* `{width=50%}` / `{page=landscape}` into data-gstack-*
|
||||
* attributes (the sanitizer keeps data- attributes; the brace text is
|
||||
* consumed so it never reaches smartypants or the page).
|
||||
*
|
||||
* 2. applyImagePolicy — runs in the orchestrator after image inlining (which
|
||||
* annotates data-gstack-px-width/-height from real bytes). Applies the
|
||||
* width rule and decides landscape promotion:
|
||||
*
|
||||
* WIDTH RULE: render at intrinsic CSS-px width, capped at the content box,
|
||||
* never upscaled — that is exactly `figure img { max-width: 100% }` doing
|
||||
* its job, so the default needs no inline style. Directives opt into more:
|
||||
* width=full stretches to the content box; <pct>/<dim> set explicit width.
|
||||
*
|
||||
* LANDSCAPE (conservative, false negatives are cheap):
|
||||
* promote only when ALL hold —
|
||||
* aspect ratio ≥ 1.8
|
||||
* AND intrinsic CSS-px width > SHRINK_LIMIT × content box
|
||||
* (content shrunk below ~40% of natural size = unreadable)
|
||||
* AND diagram provenance (rendered fence) or an alt-text token from
|
||||
* ALT_HINT_TOKENS (plain images)
|
||||
* `{page=landscape}` forces, `{page=portrait}` vetoes — both skip the
|
||||
* heuristics entirely.
|
||||
*
|
||||
* Promotion wraps the block in <div class="page-wide"> whose CSS named
|
||||
* page (`@page wide { size: <size> landscape }`, print-css.ts) rotates
|
||||
* just that page. Chromium only honors CSS page sizes when the print call
|
||||
* passes preferCSSPageSize — the orchestrator sets it when hasLandscape.
|
||||
*/
|
||||
|
||||
export interface ImagePolicyOptions {
|
||||
/** Physical content-box width in inches (page width minus margins). */
|
||||
contentWidthIn: number;
|
||||
warn: (msg: string) => void;
|
||||
}
|
||||
|
||||
export interface ImagePolicyResult {
|
||||
html: string;
|
||||
/** True when at least one block was promoted to the landscape named page. */
|
||||
hasLandscape: boolean;
|
||||
}
|
||||
|
||||
/** Aspect ratio floor for auto-promotion. */
|
||||
const MIN_ASPECT = 1.8;
|
||||
/**
|
||||
* Auto-promote only when the intrinsic CSS-px width exceeds this multiple of
|
||||
* the content box (in CSS px @96dpi). 2.5 ≈ the plan's ~1600px threshold on a
|
||||
* 6.5in letter box; calibrated against fixtures (design doc Open Question 4).
|
||||
*/
|
||||
const SHRINK_LIMIT = 2.5;
|
||||
/** Alt-text tokens that mark a plain image as diagram-like (case-insensitive). */
|
||||
const ALT_HINT_TOKENS = ["diagram", "architecture", "flowchart", "chart", "graph"];
|
||||
|
||||
// ─── Pass 1: directive suffixes ───────────────────────────────────────
|
||||
|
||||
const IMG_WITH_SUFFIX_RE = /(<img\b[^>]*>)\s*\{([^{}<>\n]{1,120})\}/gi;
|
||||
|
||||
/**
|
||||
* Consume `{...}` directive suffixes adjacent to <img> tags. Unrecognized
|
||||
* brace groups are left untouched (someone's literal prose).
|
||||
*/
|
||||
export function applyImageDirectives(html: string): string {
|
||||
return html.replace(IMG_WITH_SUFFIX_RE, (full, imgTag: string, body: string) => {
|
||||
const parsed = parseDirectives(body);
|
||||
if (!parsed) return full;
|
||||
let tag = imgTag;
|
||||
if (parsed.width) tag = addAttr(tag, "data-gstack-width", parsed.width);
|
||||
if (parsed.page) tag = addAttr(tag, "data-gstack-page", parsed.page);
|
||||
return tag;
|
||||
});
|
||||
}
|
||||
|
||||
export function parseDirectives(body: string): { width?: string; page?: string } | null {
|
||||
let width: string | undefined;
|
||||
let page: string | undefined;
|
||||
let recognized = false;
|
||||
for (const part of body.trim().split(/\s+/)) {
|
||||
const m = part.match(/^(width|page)=(.+)$/i);
|
||||
if (!m) return null; // any unknown token ⇒ not a directive group
|
||||
const key = m[1].toLowerCase();
|
||||
const value = m[2].toLowerCase();
|
||||
if (key === "width" && /^(full|\d{1,3}%|[0-9.]+(in|cm|mm|pt|px))$/.test(value)) {
|
||||
width = value;
|
||||
recognized = true;
|
||||
} else if (key === "page" && /^(landscape|portrait)$/.test(value)) {
|
||||
page = value;
|
||||
recognized = true;
|
||||
} else {
|
||||
return null; // recognized key, malformed value ⇒ leave visible, not silent
|
||||
}
|
||||
}
|
||||
return recognized ? { width, page } : null;
|
||||
}
|
||||
|
||||
function addAttr(imgTag: string, name: string, value: string): string {
|
||||
return imgTag.replace(/^<img\b/i, `<img ${name}="${value}"`);
|
||||
}
|
||||
|
||||
// ─── Pass 2: width styles + landscape promotion ───────────────────────
|
||||
|
||||
export function applyImagePolicy(html: string, opts: ImagePolicyOptions): ImagePolicyResult {
|
||||
let hasLandscape = false;
|
||||
const boxCssPx = opts.contentWidthIn * 96;
|
||||
const widthThresholdPx = boxCssPx * SHRINK_LIMIT;
|
||||
|
||||
// 2a. width directives → inline styles on the img.
|
||||
let out = html.replace(/<img\b[^>]*>/gi, (tag) => {
|
||||
const width = attrValue(tag, "data-gstack-width");
|
||||
if (!width) return tag;
|
||||
const css = width === "full" ? "100%" : width;
|
||||
return mergeStyle(tag, `width: ${css}; height: auto;`);
|
||||
});
|
||||
|
||||
// 2b. landscape promotion — standalone images (markdown images render as
|
||||
// <p><img …></p>; promote by swapping the paragraph for the wide wrapper).
|
||||
out = out.replace(/<p>\s*(<img\b[^>]*>)\s*<\/p>/gi, (full, tag: string) => {
|
||||
const decision = decideImagePromotion(tag, widthThresholdPx);
|
||||
if (!decision.promote) return full;
|
||||
hasLandscape = true;
|
||||
opts.warn(`promoting image to a landscape page (${decision.reason})`);
|
||||
return `<div class="page-wide">${tag}</div>`;
|
||||
});
|
||||
|
||||
// 2c. landscape promotion — rendered diagram figures (provenance is
|
||||
// automatic; dims come from the SVG's width/height or viewBox).
|
||||
out = out.replace(
|
||||
/<figure class="diagram[^"]*"[^>]*>[\s\S]*?<\/figure>/gi,
|
||||
(figure) => {
|
||||
if (figure.includes("diagram-error")) return figure;
|
||||
const decision = decideDiagramPromotion(figure, widthThresholdPx);
|
||||
if (!decision.promote) return figure;
|
||||
hasLandscape = true;
|
||||
opts.warn(`promoting diagram to a landscape page (${decision.reason})`);
|
||||
return `<div class="page-wide">${figure}</div>`;
|
||||
},
|
||||
);
|
||||
|
||||
return { html: out, hasLandscape };
|
||||
}
|
||||
|
||||
interface PromotionDecision {
|
||||
promote: boolean;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
function decideImagePromotion(tag: string, widthThresholdPx: number): PromotionDecision {
|
||||
const page = attrValue(tag, "data-gstack-page");
|
||||
if (page === "portrait") return { promote: false, reason: "page=portrait veto" };
|
||||
if (page === "landscape") return { promote: true, reason: "page=landscape directive" };
|
||||
|
||||
const w = num(attrValue(tag, "data-gstack-px-width"));
|
||||
const h = num(attrValue(tag, "data-gstack-px-height"));
|
||||
if (!w || !h) return { promote: false, reason: "no intrinsic dimensions" };
|
||||
if (w / h < MIN_ASPECT) return { promote: false, reason: "aspect below floor" };
|
||||
if (w <= widthThresholdPx) return { promote: false, reason: "fits portrait readably" };
|
||||
|
||||
const alt = (attrValue(tag, "alt") ?? "").toLowerCase();
|
||||
const hinted = ALT_HINT_TOKENS.some((t) => new RegExp(`\\b${t}\\b`).test(alt));
|
||||
if (!hinted) return { promote: false, reason: "no diagram hint in alt text" };
|
||||
|
||||
return { promote: true, reason: `wide diagram-like image (${Math.round(w)}px, alt hint)` };
|
||||
}
|
||||
|
||||
function decideDiagramPromotion(figure: string, widthThresholdPx: number): PromotionDecision {
|
||||
const page = attrValue(figure, "data-gstack-page");
|
||||
if (page === "portrait") return { promote: false, reason: "page=portrait veto" };
|
||||
if (page === "landscape") return { promote: true, reason: "page=landscape fence directive" };
|
||||
|
||||
const dims = svgCssDims(figure);
|
||||
if (!dims) return { promote: false, reason: "no measurable SVG dimensions" };
|
||||
if (dims.width / dims.height < MIN_ASPECT) return { promote: false, reason: "aspect below floor" };
|
||||
if (dims.width <= widthThresholdPx) return { promote: false, reason: "fits portrait readably" };
|
||||
return { promote: true, reason: `wide diagram (${Math.round(dims.width)}px)` };
|
||||
}
|
||||
|
||||
/**
|
||||
* Best-effort CSS-px dimensions of the first <svg> in a figure: explicit
|
||||
* width/height attributes (px or unitless) first, else viewBox.
|
||||
*/
|
||||
function svgCssDims(figure: string): { width: number; height: number } | null {
|
||||
const tag = figure.match(/<svg\b[^>]*>/i)?.[0];
|
||||
if (!tag) return null;
|
||||
const attrNum = (name: string): number | null => {
|
||||
const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*["']\\s*([0-9.]+)(px)?\\s*["']`, "i"));
|
||||
return m ? parseFloat(m[1]) : null;
|
||||
};
|
||||
const w = attrNum("width");
|
||||
const h = attrNum("height");
|
||||
if (w && h) return { width: w, height: h };
|
||||
const vb = tag.match(/\bviewBox\s*=\s*["']\s*[-0-9.]+[\s,]+[-0-9.]+[\s,]+([0-9.]+)[\s,]+([0-9.]+)\s*["']/i);
|
||||
if (vb) return { width: parseFloat(vb[1]), height: parseFloat(vb[2]) };
|
||||
return null;
|
||||
}
|
||||
|
||||
function attrValue(tag: string, name: string): string | null {
|
||||
const m = tag.match(new RegExp(`\\b${name}\\s*=\\s*"([^"]*)"`, "i"))
|
||||
?? tag.match(new RegExp(`\\b${name}\\s*=\\s*'([^']*)'`, "i"));
|
||||
return m ? m[1] : null;
|
||||
}
|
||||
|
||||
function num(s: string | null): number | null {
|
||||
if (s === null) return null;
|
||||
const n = parseFloat(s);
|
||||
return Number.isFinite(n) && n > 0 ? n : null;
|
||||
}
|
||||
|
||||
function mergeStyle(tag: string, css: string): string {
|
||||
const existing = attrValue(tag, "style");
|
||||
if (existing !== null) {
|
||||
return tag.replace(/\bstyle\s*=\s*(".*?"|'.*?')/i, `style="${existing.replace(/"/g, "")}; ${css}"`);
|
||||
}
|
||||
return tag.replace(/^<img\b/i, `<img style="${css}"`);
|
||||
}
|
||||
|
|
@ -32,6 +32,7 @@ import {
|
|||
renderFenceSlots,
|
||||
substituteSlots,
|
||||
} from "./diagram-prepass";
|
||||
import { applyImagePolicy } from "./image-policy";
|
||||
|
||||
class ProgressReporter {
|
||||
private readonly quiet: boolean;
|
||||
|
|
@ -119,6 +120,7 @@ export async function generate(opts: GenerateOptions): Promise<string> {
|
|||
if (!opts.quiet) process.stderr.write(`\r\x1b[K[make-pdf] warning: ${msg}\n`);
|
||||
};
|
||||
let renderTab: RenderTab | null = null;
|
||||
let hasLandscape = false;
|
||||
const getRenderTab = (): RenderTab | null => {
|
||||
if (renderTab) return renderTab;
|
||||
try {
|
||||
|
|
@ -153,15 +155,21 @@ export async function generate(opts: GenerateOptions): Promise<string> {
|
|||
}
|
||||
|
||||
progress.begin("Inlining images");
|
||||
const contentWidthIn = contentWidthInches(opts);
|
||||
finalHtml = inlineLocalImages(finalHtml, {
|
||||
inputDir: path.dirname(input),
|
||||
strict: opts.strict === true,
|
||||
allowNetwork: opts.allowNetwork === true,
|
||||
contentWidthIn: contentWidthInches(opts),
|
||||
contentWidthIn,
|
||||
warn,
|
||||
getTab: getRenderTab,
|
||||
});
|
||||
progress.end("Inlining images");
|
||||
|
||||
// Width directives + conservative auto-landscape (image-policy).
|
||||
const policy = applyImagePolicy(finalHtml, { contentWidthIn, warn });
|
||||
finalHtml = policy.html;
|
||||
hasLandscape = policy.hasLandscape;
|
||||
} finally {
|
||||
renderTab?.close();
|
||||
}
|
||||
|
|
@ -212,6 +220,10 @@ export async function generate(opts: GenerateOptions): Promise<string> {
|
|||
tagged: opts.tagged !== false,
|
||||
outline: opts.outline !== false,
|
||||
printBackground: !!opts.watermark,
|
||||
// Named landscape pages only take effect when Chromium honors CSS page
|
||||
// sizes. Flip it ONLY when a promotion exists — minimal behavior change
|
||||
// for every other document.
|
||||
preferCSSPageSize: hasLandscape ? true : undefined,
|
||||
toc: opts.toc,
|
||||
});
|
||||
progress.end("Generating PDF");
|
||||
|
|
|
|||
|
|
@ -118,6 +118,22 @@ function pageRules(size: string, margin: string, opts: PrintCssOptions): string
|
|||
` @bottom-center { content: none; }`,
|
||||
` @bottom-right { content: none; }`,
|
||||
`}`,
|
||||
``,
|
||||
// Landscape named page for promoted wide diagrams/images (image-policy).
|
||||
// Chromium-only — exactly the engine this pipeline always prints with.
|
||||
// Honored only when the print call passes preferCSSPageSize (orchestrator
|
||||
// sets it when a promotion exists).
|
||||
`@page wide {`,
|
||||
` size: ${size} landscape;`,
|
||||
` margin: ${margin};`,
|
||||
`}`,
|
||||
`.page-wide {`,
|
||||
` page: wide;`,
|
||||
` break-before: page;`,
|
||||
` break-after: page;`,
|
||||
`}`,
|
||||
`.page-wide img, .page-wide svg { width: 100%; height: auto; max-width: none; }`,
|
||||
`.page-wide figure.diagram > svg { max-width: none; }`,
|
||||
].filter(line => line !== "").join("\n");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
import { marked } from "marked";
|
||||
import { smartypants } from "./smartypants";
|
||||
import { printCss, type PrintCssOptions } from "./print-css";
|
||||
import { applyImageDirectives } from "./image-policy";
|
||||
|
||||
export interface RenderOptions {
|
||||
markdown: string;
|
||||
|
|
@ -60,8 +61,13 @@ export function render(opts: RenderOptions): RenderResult {
|
|||
// 1. Markdown → HTML
|
||||
const rawHtml = marked.parse(opts.markdown, { async: false }) as string;
|
||||
|
||||
// 1.5. Image directive suffixes: `{width=50%}` → data-gstack-*
|
||||
// attributes. Before the sanitizer (which keeps data- attrs) so the brace
|
||||
// text never reaches smartypants or the final page.
|
||||
const directedHtml = applyImageDirectives(rawHtml);
|
||||
|
||||
// 2. Sanitize
|
||||
const cleanHtml = sanitizeUntrustedHtml(rawHtml);
|
||||
const cleanHtml = sanitizeUntrustedHtml(directedHtml);
|
||||
|
||||
// 3. Decode common entities so smartypants can match raw " and '.
|
||||
// marked HTML-encodes quotes in text ("hello" → "hello");
|
||||
|
|
|
|||
Loading…
Reference in New Issue