This commit is contained in:
DevNinja 2026-06-03 07:36:45 +02:00 committed by GitHub
commit 3299553a84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 98 additions and 22 deletions

View File

@ -106,14 +106,22 @@ export function render(opts: RenderOptions): RenderResult {
})
: "";
// Assign stable ids to body headings so the TOC's `#toc-N` anchors and
// `data-toc-target` spans resolve to a real element. Headings that already
// declare an id keep it; the TOC points at whatever id the heading carries.
// Only worth doing when a TOC is requested (the ids exist solely for it).
const { html: bodyHtml, headings: tocHeadings } = opts.toc
? annotateHeadingIds(typographicHtml)
: { html: typographicHtml, headings: [] };
const tocBlock = opts.toc
? buildTocBlock(typographicHtml)
? buildTocBlock(tocHeadings)
: "";
// Wrap body in .chapter sections at H1 boundaries if chapter breaks are on.
const chapterHtml = opts.noChapterBreaks
? `<section class="chapter">${typographicHtml}</section>`
: wrapChaptersByH1(typographicHtml);
? `<section class="chapter">${bodyHtml}</section>`
: wrapChaptersByH1(bodyHtml);
const watermarkBlock = opts.watermark
? `<div class="watermark">${escapeHtml(opts.watermark)}</div>`
@ -251,23 +259,29 @@ function buildCoverBlock(opts: {
].filter(Boolean).join("\n");
}
interface TocHeading {
level: number;
text: string;
id: string;
}
/**
* Scan HTML for H1/H2/H3 headings and emit a TOC placeholder.
* Page numbers are filled in by Paged.js (when --toc is passed and Paged.js
* polyfill is injected).
* Emit a TOC placeholder from headings that already carry ids (assigned by
* annotateHeadingIds). Each entry's `#id` anchor and `data-toc-target` span
* resolve to the matching body heading. Page numbers are filled in by Paged.js
* (when --toc is passed and the Paged.js polyfill is injected), which needs the
* target heading to exist with the referenced id before it can count pages.
*/
function buildTocBlock(html: string): string {
const headings = extractHeadings(html);
function buildTocBlock(headings: TocHeading[]): string {
if (headings.length === 0) return "";
const items = headings.map((h, i) => {
const items = headings.map((h) => {
const level = h.level >= 2 ? "level-2" : "level-1";
const id = `toc-${i}`;
return [
` <li class="${level}">`,
` <span class="toc-title"><a href="#${id}">${escapeHtml(h.text)}</a></span>`,
` <span class="toc-title"><a href="#${h.id}">${escapeHtml(h.text)}</a></span>`,
` <span class="toc-dots"></span>`,
` <span class="toc-page" data-toc-target="${id}"></span>`,
` <span class="toc-page" data-toc-target="${h.id}"></span>`,
` </li>`,
].join("\n");
}).join("\n");
@ -282,16 +296,36 @@ function buildTocBlock(html: string): string {
].join("\n");
}
function extractHeadings(html: string): Array<{ level: number; text: string }> {
const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi;
const headings: Array<{ level: number; text: string }> = [];
let match;
while ((match = re.exec(html)) !== null) {
const level = parseInt(match[1].slice(1), 10);
const text = decodeTextEntities(stripTags(match[2]).trim());
if (text) headings.push({ level, text });
}
return headings;
/**
* Walk H1-H3 headings in document order, assigning each a stable id the TOC can
* link to. A heading that already declares an `id` keeps it (the TOC points at
* the existing id); a heading with no id gets `id="toc-N"` injected, where N is
* its document-order index. Returns the rewritten HTML plus the heading list
* (level, decoded text, resolved id) for buildTocBlock to consume, so anchors
* and targets are guaranteed to agree.
*/
function annotateHeadingIds(html: string): { html: string; headings: TocHeading[] } {
const headings: TocHeading[] = [];
let i = 0;
const out = html.replace(
/<(h[1-3])([^>]*)>([\s\S]*?)<\/\1>/gi,
(whole, tag: string, attrs: string, inner: string) => {
const level = parseInt(tag.slice(1), 10);
const text = decodeTextEntities(stripTags(inner).trim());
// Empty headings carry no TOC entry; leave them untouched.
if (!text) return whole;
const idx = i++;
const existing = attrs.match(/\bid\s*=\s*["']([^"']*)["']/i);
if (existing) {
headings.push({ level, text, id: existing[1] });
return whole;
}
const id = `toc-${idx}`;
headings.push({ level, text, id });
return `<${tag}${attrs} id="${id}">${inner}</${tag}>`;
},
);
return { html: out, headings };
}
/**

View File

@ -227,6 +227,48 @@ describe("render (end-to-end)", () => {
expect(result.html).toContain("Two");
});
// Issue #1689: every TOC anchor (`#toc-N`) and page-number target
// (`data-toc-target="toc-N"`) must resolve to a body heading that actually
// carries that id. Before the fix, the TOC minted ids no heading ever
// received, so anchors were dead and Paged.js had no target to count pages
// against.
test("TOC anchors resolve to body heading ids (issue #1689)", () => {
const result = render({
markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`,
toc: true,
});
const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
const targets = [...result.html.matchAll(/data-toc-target="([^"]+)"/g)].map((m) => m[1]);
const headingIds = [...result.html.matchAll(/<h[1-3][^>]*\bid="([^"]+)"/g)].map((m) => m[1]);
expect(hrefs.length).toBe(3);
expect(targets).toEqual(hrefs);
// Every anchor + target points at a real heading id.
for (const ref of [...hrefs, ...targets]) {
expect(headingIds).toContain(ref);
}
});
test("TOC keeps a heading's pre-existing id instead of overwriting it (issue #1689)", () => {
const result = render({
markdown: `<h1 id="intro">Intro</h1>\n\n# Two\n`,
toc: true,
});
// The heading's own id is preserved and the TOC links to it.
expect(result.html).toContain(`id="intro"`);
expect(result.html).toContain(`href="#intro"`);
expect(result.html).toContain(`data-toc-target="intro"`);
// The id-less second heading still gets a minted id its entry points at.
const headingIds = [...result.html.matchAll(/<h[1-3][^>]*\bid="([^"]+)"/g)].map((m) => m[1]);
const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
for (const ref of hrefs) expect(headingIds).toContain(ref);
});
test("no toc-id injection when toc is off (issue #1689)", () => {
const result = render({ markdown: `# One\n\n## Sub\n`, toc: false });
expect(result.html).not.toContain(`id="toc-`);
});
test("strips dangerous HTML from untrusted markdown", () => {
const result = render({
markdown: `# Safe\n\n<script>alert('xss')</script>\n\nBody.`,