mirror of https://github.com/garrytan/gstack.git
fix(make-pdf): assign body heading ids so --toc anchors resolve
- buildTocBlock minted `toc-N` ids for its `#toc-N` anchors and `data-toc-target` spans, but no body heading ever received those ids, so every TOC link was a dead fragment and Paged.js had no target to count page numbers against - Add annotateHeadingIds: walk H1-H3 in document order, inject `id="toc-N"` on headings that lack one, preserve any heading that already declares an id and point the TOC at it - Share the resolved heading/id list between body and TOC so anchors and targets always agree - 3 regression tests: anchor-resolves-to-heading, existing-id preserved, no id injection when toc is off Fixes #1689.
This commit is contained in:
parent
070722ace3
commit
8dce597eba
|
|
@ -106,14 +106,22 @@ export function render(opts: RenderOptions): RenderResult {
|
|||
})
|
||||
: "";
|
||||
|
||||
// Assign stable ids to body headings so the TOC's `#toc-N` anchors and
|
||||
// `data-toc-target` spans resolve to a real element. Headings that already
|
||||
// declare an id keep it; the TOC points at whatever id the heading carries.
|
||||
// Only worth doing when a TOC is requested (the ids exist solely for it).
|
||||
const { html: bodyHtml, headings: tocHeadings } = opts.toc
|
||||
? annotateHeadingIds(typographicHtml)
|
||||
: { html: typographicHtml, headings: [] };
|
||||
|
||||
const tocBlock = opts.toc
|
||||
? buildTocBlock(typographicHtml)
|
||||
? buildTocBlock(tocHeadings)
|
||||
: "";
|
||||
|
||||
// Wrap body in .chapter sections at H1 boundaries if chapter breaks are on.
|
||||
const chapterHtml = opts.noChapterBreaks
|
||||
? `<section class="chapter">${typographicHtml}</section>`
|
||||
: wrapChaptersByH1(typographicHtml);
|
||||
? `<section class="chapter">${bodyHtml}</section>`
|
||||
: wrapChaptersByH1(bodyHtml);
|
||||
|
||||
const watermarkBlock = opts.watermark
|
||||
? `<div class="watermark">${escapeHtml(opts.watermark)}</div>`
|
||||
|
|
@ -251,23 +259,29 @@ function buildCoverBlock(opts: {
|
|||
].filter(Boolean).join("\n");
|
||||
}
|
||||
|
||||
interface TocHeading {
|
||||
level: number;
|
||||
text: string;
|
||||
id: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan HTML for H1/H2/H3 headings and emit a TOC placeholder.
|
||||
* Page numbers are filled in by Paged.js (when --toc is passed and Paged.js
|
||||
* polyfill is injected).
|
||||
* Emit a TOC placeholder from headings that already carry ids (assigned by
|
||||
* annotateHeadingIds). Each entry's `#id` anchor and `data-toc-target` span
|
||||
* resolve to the matching body heading. Page numbers are filled in by Paged.js
|
||||
* (when --toc is passed and the Paged.js polyfill is injected), which needs the
|
||||
* target heading to exist with the referenced id before it can count pages.
|
||||
*/
|
||||
function buildTocBlock(html: string): string {
|
||||
const headings = extractHeadings(html);
|
||||
function buildTocBlock(headings: TocHeading[]): string {
|
||||
if (headings.length === 0) return "";
|
||||
|
||||
const items = headings.map((h, i) => {
|
||||
const items = headings.map((h) => {
|
||||
const level = h.level >= 2 ? "level-2" : "level-1";
|
||||
const id = `toc-${i}`;
|
||||
return [
|
||||
` <li class="${level}">`,
|
||||
` <span class="toc-title"><a href="#${id}">${escapeHtml(h.text)}</a></span>`,
|
||||
` <span class="toc-title"><a href="#${h.id}">${escapeHtml(h.text)}</a></span>`,
|
||||
` <span class="toc-dots"></span>`,
|
||||
` <span class="toc-page" data-toc-target="${id}"></span>`,
|
||||
` <span class="toc-page" data-toc-target="${h.id}"></span>`,
|
||||
` </li>`,
|
||||
].join("\n");
|
||||
}).join("\n");
|
||||
|
|
@ -282,16 +296,36 @@ function buildTocBlock(html: string): string {
|
|||
].join("\n");
|
||||
}
|
||||
|
||||
function extractHeadings(html: string): Array<{ level: number; text: string }> {
|
||||
const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi;
|
||||
const headings: Array<{ level: number; text: string }> = [];
|
||||
let match;
|
||||
while ((match = re.exec(html)) !== null) {
|
||||
const level = parseInt(match[1].slice(1), 10);
|
||||
const text = decodeTextEntities(stripTags(match[2]).trim());
|
||||
if (text) headings.push({ level, text });
|
||||
/**
|
||||
* Walk H1-H3 headings in document order, assigning each a stable id the TOC can
|
||||
* link to. A heading that already declares an `id` keeps it (the TOC points at
|
||||
* the existing id); a heading with no id gets `id="toc-N"` injected, where N is
|
||||
* its document-order index. Returns the rewritten HTML plus the heading list
|
||||
* (level, decoded text, resolved id) for buildTocBlock to consume, so anchors
|
||||
* and targets are guaranteed to agree.
|
||||
*/
|
||||
function annotateHeadingIds(html: string): { html: string; headings: TocHeading[] } {
|
||||
const headings: TocHeading[] = [];
|
||||
let i = 0;
|
||||
const out = html.replace(
|
||||
/<(h[1-3])([^>]*)>([\s\S]*?)<\/\1>/gi,
|
||||
(whole, tag: string, attrs: string, inner: string) => {
|
||||
const level = parseInt(tag.slice(1), 10);
|
||||
const text = decodeTextEntities(stripTags(inner).trim());
|
||||
// Empty headings carry no TOC entry; leave them untouched.
|
||||
if (!text) return whole;
|
||||
const idx = i++;
|
||||
const existing = attrs.match(/\bid\s*=\s*["']([^"']*)["']/i);
|
||||
if (existing) {
|
||||
headings.push({ level, text, id: existing[1] });
|
||||
return whole;
|
||||
}
|
||||
return headings;
|
||||
const id = `toc-${idx}`;
|
||||
headings.push({ level, text, id });
|
||||
return `<${tag}${attrs} id="${id}">${inner}</${tag}>`;
|
||||
},
|
||||
);
|
||||
return { html: out, headings };
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -227,6 +227,48 @@ describe("render (end-to-end)", () => {
|
|||
expect(result.html).toContain("Two");
|
||||
});
|
||||
|
||||
// Issue #1689: every TOC anchor (`#toc-N`) and page-number target
|
||||
// (`data-toc-target="toc-N"`) must resolve to a body heading that actually
|
||||
// carries that id. Before the fix, the TOC minted ids no heading ever
|
||||
// received, so anchors were dead and Paged.js had no target to count pages
|
||||
// against.
|
||||
test("TOC anchors resolve to body heading ids (issue #1689)", () => {
|
||||
const result = render({
|
||||
markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`,
|
||||
toc: true,
|
||||
});
|
||||
const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
|
||||
const targets = [...result.html.matchAll(/data-toc-target="([^"]+)"/g)].map((m) => m[1]);
|
||||
const headingIds = [...result.html.matchAll(/<h[1-3][^>]*\bid="([^"]+)"/g)].map((m) => m[1]);
|
||||
|
||||
expect(hrefs.length).toBe(3);
|
||||
expect(targets).toEqual(hrefs);
|
||||
// Every anchor + target points at a real heading id.
|
||||
for (const ref of [...hrefs, ...targets]) {
|
||||
expect(headingIds).toContain(ref);
|
||||
}
|
||||
});
|
||||
|
||||
test("TOC keeps a heading's pre-existing id instead of overwriting it (issue #1689)", () => {
|
||||
const result = render({
|
||||
markdown: `<h1 id="intro">Intro</h1>\n\n# Two\n`,
|
||||
toc: true,
|
||||
});
|
||||
// The heading's own id is preserved and the TOC links to it.
|
||||
expect(result.html).toContain(`id="intro"`);
|
||||
expect(result.html).toContain(`href="#intro"`);
|
||||
expect(result.html).toContain(`data-toc-target="intro"`);
|
||||
// The id-less second heading still gets a minted id its entry points at.
|
||||
const headingIds = [...result.html.matchAll(/<h[1-3][^>]*\bid="([^"]+)"/g)].map((m) => m[1]);
|
||||
const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
|
||||
for (const ref of hrefs) expect(headingIds).toContain(ref);
|
||||
});
|
||||
|
||||
test("no toc-id injection when toc is off (issue #1689)", () => {
|
||||
const result = render({ markdown: `# One\n\n## Sub\n`, toc: false });
|
||||
expect(result.html).not.toContain(`id="toc-`);
|
||||
});
|
||||
|
||||
test("strips dangerous HTML from untrusted markdown", () => {
|
||||
const result = render({
|
||||
markdown: `# Safe\n\n<script>alert('xss')</script>\n\nBody.`,
|
||||
|
|
|
|||
Loading…
Reference in New Issue