/**
* Diagram + image pre-pass. Runs between "read markdown" and render() in the
* orchestrator, and owns everything that needs the diagram-render bundle.
*
* markdown ─▶ extractDiagramFences() ──▶ render() (marked+sanitize+smarty)
* │ fences → placeholder tokens │
* │ ▼
* └─▶ renderFenceSlots() ───────────▶ substituteSlots(html, slots)
* one browse render tab/run │
* error ⇒ diagnostic block + page reload ▼
* inlineLocalImages(html)
* data URIs, probe dims from bytes,
* downscale >2x content box @300dpi,
* remote warn / missing placeholder /
* --strict hard-fail
*
* Placeholders survive marked, the sanitizer, and smartypants because they are
* plain hyphenated lowercase tokens with no quotes or HTML. Slot HTML is run
* through the same sanitizer as user content before substitution (the bundle
* renders with securityLevel strict — the sanitizer is the second layer).
*
* Reset contract (eng-review D6.2): each fence renders with a fresh
* mermaid.render id; after ANY render error the bundle page is reloaded before
* the next fence so a poisoned global can't corrupt diagram N+1.
*/
import * as fs from "node:fs";
import * as os from "node:os";
import * as path from "node:path";
import * as crypto from "node:crypto";
import * as browseClient from "./browseClient";
import { sanitizeUntrustedHtml } from "./render";
import { imageDims } from "./image-size";
// ─── Types ────────────────────────────────────────────────────────────
export interface DiagramFence {
/** "mermaid" | "excalidraw" */
lang: string;
/** Fence body (the diagram source). */
source: string;
/** Optional title="..." from the fence info string (a11y label, D6.4). */
title?: string;
/** Optional page=landscape|portrait fence directive (image-policy override). */
page?: "landscape" | "portrait";
/** render=false → leave as a plain code block (escape hatch, D6.3). */
render: boolean;
/** Placeholder token substituted into the markdown. */
token: string;
/** 1-based ordinal among rendered fences (unique ids, aria fallback). */
ordinal: number;
}
export interface FenceExtraction {
markdown: string;
fences: DiagramFence[];
}
export interface PrepassWarnings {
warn: (msg: string) => void;
}
export interface PrepassImageOptions {
/** Directory of the source markdown — relative image paths resolve here. */
inputDir: string;
/** Hard-fail on missing/remote images instead of warn (D6.1). */
strict: boolean;
/** Remote images are left untouched when network is explicitly allowed. */
allowNetwork: boolean;
/** Physical content-box width in inches (page width minus margins). */
contentWidthIn: number;
warn: (msg: string) => void;
/** Lazily provides a ready bundle tab (only opened when needed). */
getTab: () => RenderTab | null;
}
/** Print-resolution policy (eng-review D4): downscale rasters wider than
* 2 × contentWidth × 300dpi down to contentWidth × 300dpi. */
const PRINT_DPI = 300;
const DOWNSCALE_FACTOR = 2;
export class StrictModeError extends Error {
constructor(msg: string) {
super(msg);
this.name = "StrictModeError";
}
}
// ─── Fence extraction (pure) ──────────────────────────────────────────
const DIAGRAM_LANGS = new Set(["mermaid", "excalidraw"]);
/**
* Extract top-level ```mermaid / ```excalidraw fences, replacing each with a
* unique placeholder token paragraph. Backtick and tilde fences, any length
* >= 3; closers must be at least as long as the opener (CommonMark). Fences
* with `render=false` in the info string are left untouched.
*/
export function extractDiagramFences(markdown: string): FenceExtraction {
const lines = markdown.split("\n");
const out: string[] = [];
const fences: DiagramFence[] = [];
const runId = crypto.randomBytes(4).toString("hex");
let i = 0;
let openFence: { char: string; len: number; info: string; body: string[] } | null = null;
let ordinal = 0;
while (i < lines.length) {
const line = lines[i];
if (openFence) {
const close = matchFenceLine(line);
if (close && close.char === openFence.char && close.len >= openFence.len && close.info === "") {
const info = parseInfoString(openFence.info);
if (DIAGRAM_LANGS.has(info.lang) && info.render) {
ordinal++;
const token = `gstack-diagram-slot-${runId}-${ordinal}`;
fences.push({
lang: info.lang,
source: openFence.body.join("\n"),
title: info.title,
page: info.page,
render: true,
token,
ordinal,
});
out.push("", token, "");
} else {
// Not a diagram fence (or render=false): replay verbatim, but strip
// the render=false flag so it never leaks into highlighted output.
const infoOut = info.render ? openFence.info : info.lang;
out.push(`${openFence.char.repeat(openFence.len)}${infoOut}`);
out.push(...openFence.body);
out.push(line);
}
openFence = null;
i++;
continue;
}
openFence.body.push(line);
i++;
continue;
}
const open = matchFenceLine(line);
if (open && open.info !== "") {
openFence = { char: open.char, len: open.len, info: open.info, body: [] };
i++;
continue;
}
if (open) {
// Anonymous fence (plain code block) — copy through to its closer so a
// ```mermaid example INSIDE a plain fence is never extracted.
out.push(line);
i++;
while (i < lines.length) {
const l = lines[i];
const close = matchFenceLine(l);
out.push(l);
i++;
if (close && close.char === open.char && close.len >= open.len && close.info === "") break;
}
continue;
}
out.push(line);
i++;
}
// Unclosed fence at EOF: replay verbatim (CommonMark treats it as code to EOF).
if (openFence) {
out.push(`${openFence.char.repeat(openFence.len)}${openFence.info}`);
out.push(...openFence.body);
}
return { markdown: out.join("\n"), fences };
}
function matchFenceLine(line: string): { char: string; len: number; info: string } | null {
const m = line.match(/^ {0,3}(`{3,}|~{3,})\s*(.*)$/);
if (!m) return null;
return { char: m[1][0], len: m[1].length, info: m[2].trim() };
}
/** Parse a fence info string: `mermaid`, `mermaid render=false`,
* `mermaid title="Auth flow"`, `mermaid page=landscape`. */
export function parseInfoString(info: string): {
lang: string; render: boolean; title?: string; page?: "landscape" | "portrait";
} {
const lang = (info.match(/^\S+/)?.[0] ?? "").toLowerCase();
const render = !/\brender\s*=\s*false\b/i.test(info);
const title = info.match(/\btitle\s*=\s*"([^"]*)"/i)?.[1]
?? info.match(/\btitle\s*=\s*'([^']*)'/i)?.[1];
const pageRaw = info.match(/\bpage\s*=\s*(landscape|portrait)\b/i)?.[1]?.toLowerCase();
const page = pageRaw === "landscape" || pageRaw === "portrait" ? pageRaw : undefined;
return { lang, render, title, page };
}
// ─── Slot substitution (pure) ─────────────────────────────────────────
/**
* Replace placeholder tokens in rendered HTML with their final slot HTML.
* marked wraps the bare token line in
…
; replace the wrapper too so
* the figure isn't nested inside a paragraph.
*/
export function substituteSlots(html: string, slots: Map): string {
let s = html;
for (const [token, slotHtml] of slots) {
const wrapped = new RegExp(`
\\s*${token}\\s*
`, "g");
if (wrapped.test(s)) {
s = s.replace(new RegExp(`
\\s*${token}\\s*
`, "g"), slotHtml);
} else {
s = s.split(token).join(slotHtml);
}
}
return s;
}
/**
* Visible diagnostic block for a failed fence render — never silent raw code
* (eng-review: explicit error blocks). Sanitizer-safe: all dynamic content is
* HTML-escaped.
*/
export function buildDiagnosticBlock(fence: DiagramFence, errorMessage: string): string {
const excerpt = fence.source.split("\n").slice(0, 8).join("\n");
const truncated = fence.source.split("\n").length > 8 ? "\n…" : "";
return [
``,
`Diagram failed to render (${escapeHtml(fence.lang)})`,
`