From a10d1d8162bca209b50a7a71e538f7671e109c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrea=20S=C3=A1nchez=20Blanco?= Date: Thu, 21 May 2026 10:49:51 +0200 Subject: [PATCH 1/2] feat(docs): split llms.txt into spec-compliant index and full dump --- packages/stacks-docs/.eleventy.js | 4 +++ packages/stacks-docs/llms.11ty.js | 48 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 packages/stacks-docs/llms.11ty.js diff --git a/packages/stacks-docs/.eleventy.js b/packages/stacks-docs/.eleventy.js index d9b9f6981b..acdc51237b 100644 --- a/packages/stacks-docs/.eleventy.js +++ b/packages/stacks-docs/.eleventy.js @@ -18,8 +18,12 @@ module.exports = function(eleventyConfig) { eleventyConfig.addPlugin(iconPlugin); eleventyConfig.addPlugin(headerPlugin); eleventyConfig.addPlugin(tipPlugin); + // Produces /llms-full.txt — a content dump for tools that want the full + // markdown of every page in one file. The spec-compliant index lives at + // /llms.txt and is rendered by llms.11ty.js. eleventyConfig.addPlugin(llmsTxtPlugin, { siteUrl: 'https://v2.stackoverflow.design', + outputPath: 'llms-full.txt', collections: ['base', 'components', 'develop', 'foundation'], additionalMetadata: ['description'], normalizeWhitespace: true, diff --git a/packages/stacks-docs/llms.11ty.js b/packages/stacks-docs/llms.11ty.js new file mode 100644 index 0000000000..006a5338c4 --- /dev/null +++ b/packages/stacks-docs/llms.11ty.js @@ -0,0 +1,48 @@ +// Renders the spec-compliant llms.txt index at /llms.txt +// (see https://llmstxt.org). The full content dump lives at /llms-full.txt, +// generated by eleventy-plugin-llms-txt — see .eleventy.js. + +const SECTIONS = [ + { tag: "base", heading: "Base utilities" }, + { tag: "components", heading: "Components" }, + { tag: "develop", heading: "Develop" }, + { tag: "foundation", heading: "Foundation" }, +]; + +function pageLink(page, siteUrl) { + const title = page.data.title; + const description = (page.data.description || "") + .replace(/<[^>]+>/g, "") // descriptions may contain inline HTML for the rendered page + .replace(/\s+/g, " ") + .trim(); + const suffix = description ? `: ${description}` : ""; + return `- [${title}](${siteUrl}${page.url})${suffix}`; +} + +module.exports = class { + data() { + return { + permalink: "/llms.txt", + eleventyExcludeFromCollections: true, + }; + } + + render({ collections, site }) { + const sections = SECTIONS.map(({ tag, heading }) => { + const pages = (collections[tag] || []) + .slice() + .sort((a, b) => + (a.data.title || "").localeCompare(b.data.title || "") + ); + const lines = pages.map((p) => pageLink(p, site.url)); + return `## ${heading}\n\n${lines.join("\n")}`; + }); + + return `# ${site.title} + +> ${site.description} + +${sections.join("\n\n")} +`; + } +}; From 32d39b1aff33ae617871dc84ced391d4b98cb3d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrea=20S=C3=A1nchez=20Blanco?= Date: Thu, 21 May 2026 11:18:10 +0200 Subject: [PATCH 2/2] fix(docs): make llms.txt HTML stripper resilient to unclosed tags CodeQL js/incomplete-multi-character-sanitization flagged the single-pass regex strip in llms.11ty.js because an unclosed ``) would leave the pattern in the output. Loop the replace until the string is stable, per CodeQL's recommended fix. Output is unchanged for all current inputs (frontmatter descriptions are author-written and well-formed); this is defense in depth. --- packages/stacks-docs/llms.11ty.js | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/packages/stacks-docs/llms.11ty.js b/packages/stacks-docs/llms.11ty.js index 006a5338c4..4730200001 100644 --- a/packages/stacks-docs/llms.11ty.js +++ b/packages/stacks-docs/llms.11ty.js @@ -9,10 +9,22 @@ const SECTIONS = [ { tag: "foundation", heading: "Foundation" }, ]; +// Descriptions may contain inline HTML for the rendered page; strip it for +// llms.txt. Loop until stable so unclosed/nested-looking tags can't reintroduce +// the pattern (CodeQL js/incomplete-multi-character-sanitization). +function stripHtml(input) { + let previous; + let output = input; + do { + previous = output; + output = output.replace(/<[^>]+>/g, ""); + } while (output !== previous); + return output; +} + function pageLink(page, siteUrl) { const title = page.data.title; - const description = (page.data.description || "") - .replace(/<[^>]+>/g, "") // descriptions may contain inline HTML for the rendered page + const description = stripHtml(page.data.description || "") .replace(/\s+/g, " ") .trim(); const suffix = description ? `: ${description}` : "";