docs: add generator for llms-full.txt (#11323)

* initial

* improvements

* finished implementation

* transform links to index.html.md links

* fix for resources
This commit is contained in:
Shahed Nasser
2025-02-05 16:34:39 +02:00
committed by GitHub
parent da25980d24
commit 5f7ff7f9f0
17 changed files with 31023 additions and 17 deletions

View File

@@ -24,6 +24,7 @@
"autoprefixer": "10.4.14",
"clsx": "^2.0.0",
"docs-ui": "*",
"docs-utils": "*",
"jsdom": "^22.1.0",
"json-schema": "^0.4.0",
"json-stringify-pretty-compact": "^4.0.0",

View File

@@ -6,6 +6,7 @@ import readSpecDocument from "./read-spec-document"
import getSectionId from "./get-section-id"
import dereference from "./dereference"
import { unstable_cache } from "next/cache"
import { oasFileToPath } from "docs-utils"
async function getPathsOfTag_(
tagName: string,
@@ -25,9 +26,7 @@ async function getPathsOfTag_(
return {
...fileContent,
operationPath: `/${file
.replaceAll(/(?<!\{[^}]*)_(?![^{]*\})/g, "/")
.replace(/\.[A-Za-z]+$/, "")}`,
operationPath: oasFileToPath(file),
}
})
)

View File

@@ -17,4 +17,5 @@ NEXT_PUBLIC_AI_WEBSITE_ID=
NEXT_PUBLIC_AI_API_ASSISTANT_RECAPTCHA_SITE_KEY=
CLOUDINARY_CLOUD_NAME=
NEXT_PUBLIC_BASE_PATH=
NEXT_PUBLIC_GA_ID=
NEXT_PUBLIC_GA_ID=
NEXT_PUBLIC_PROD_BASE_URL=

View File

@@ -67,7 +67,7 @@ export async function GET(req: NextRequest, { params }: Params) {
const getCleanMd_ = unstable_cache(
async (filePath: string, plugins?: { before?: Plugin[]; after?: Plugin[] }) =>
getCleanMd({ filePath, plugins }),
getCleanMd({ file: filePath, plugins }),
["clean-md"],
{
revalidate: 3600,

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,234 @@
import "dotenv/config"
import path from "path"
import { sidebar } from "../sidebar.mjs"
import { generateEditedDates, generateSidebar } from "build-scripts"
import {
generateEditedDates,
generateLlmsFull,
generateSidebar,
} from "build-scripts"
import {
addUrlToRelativeLink,
crossProjectLinksPlugin,
localLinksRehypePlugin,
} from "remark-rehype-plugins"
async function main() {
await generateEditedDates()
await generateSidebar(sidebar, {
addNumbering: true,
})
const baseUrl =
process.env.NEXT_PUBLIC_PROD_BASE_URL || process.env.NEXT_PUBLIC_BASE_URL
await generateLlmsFull({
outputPath: path.join(process.cwd(), "public", "llms-full.txt"),
plugins: {
before: [
[
crossProjectLinksPlugin,
{
baseUrl,
projectUrls: {
resources: {
url: baseUrl,
},
"user-guide": {
url: baseUrl,
},
ui: {
url: baseUrl,
},
api: {
url: baseUrl,
},
},
useBaseUrl: true,
},
],
[localLinksRehypePlugin],
],
after: [[addUrlToRelativeLink, { url: baseUrl }]],
},
scanDirs: [
{
dir: path.join(process.cwd(), "app"),
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"app",
"commerce-modules"
),
allowedFilesPatterns: [
/^(?!.*\/(workflows|js-sdk|extend|events|admin-widget-zones)\/).*$/,
],
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"references",
"core_flows"
),
allowedFilesPatterns: [/Workflows_[^.]+\/functions/],
generator: {
name: "workflows",
options: {
baseUrl,
},
},
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"references",
"core_flows"
),
allowedFilesPatterns: [/Steps_[^.]+\/functions/],
generator: {
name: "steps",
options: {
baseUrl,
},
},
},
{
dir: path.join(process.cwd(), "..", "resources", "app", "medusa-cli"),
},
{
dir: path.join(process.cwd(), "..", "resources", "app", "medusa-cli"),
},
{
dir: path.join(process.cwd(), "..", "resources", "app", "js-sdk"),
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"references",
"js_sdk",
"admin"
),
allowedFilesPatterns: [/\/methods\//],
generator: {
name: "jsSdk",
options: {
baseUrl,
type: "Admin",
},
},
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"references",
"js_sdk",
"auth"
),
allowedFilesPatterns: [/\/methods\//],
generator: {
name: "jsSdk",
options: {
baseUrl,
type: "Auth",
},
},
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"references",
"js_sdk",
"store"
),
allowedFilesPatterns: [/\/properties\//],
generator: {
name: "jsSdk",
options: {
baseUrl,
type: "Store",
},
},
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"references",
"medusa_config",
"interfaces",
"medusa_config.ConfigModule"
),
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"app",
"admin-components"
),
},
{
dir: path.join(
process.cwd(),
"..",
"resources",
"app",
"service-factory-reference"
),
},
{
dir: path.join(process.cwd(), "..", "api-reference", "markdown"),
},
{
dir: path.join(
process.cwd(),
"..",
"api-reference",
"specs",
"admin",
"paths"
),
ext: "yaml",
generator: {
name: "apiRef",
options: {
baseUrl: `${baseUrl}/api/admin`,
type: "Admin",
},
},
},
{
dir: path.join(
process.cwd(),
"..",
"api-reference",
"specs",
"store",
"paths"
),
ext: "yaml",
generator: {
name: "apiRef",
options: {
baseUrl: `${baseUrl}/api/store`,
type: "Store",
},
},
},
],
})
}
void main()

View File

@@ -24,7 +24,9 @@ export async function GET(req: NextRequest, { params }: Params) {
path.join(process.cwd(), "app")
path.join(process.cwd(), "references")
const filePathFromMap = await getFileFromMaps(`/${slug.join("/")}`)
const filePathFromMap = await getFileFromMaps(
`/${slug.join("/")}`.replace("//", "/")
)
if (!filePathFromMap) {
return notFound()
}
@@ -77,7 +79,7 @@ export async function GET(req: NextRequest, { params }: Params) {
const getCleanMd_ = unstable_cache(
async (filePath: string, plugins?: { before?: Plugin[]; after?: Plugin[] }) =>
getCleanMd({ filePath, plugins }),
getCleanMd({ file: filePath, plugins }),
["clean-md"],
{
revalidate: 3600,

View File

@@ -58,7 +58,7 @@ const getCleanMd_ = unstable_cache(
const iconNames = Object.keys(Icons).filter((name) => name !== "default")
return getCleanMd({
filePath,
file: filePath,
plugins,
parserOptions: {
ComponentExample: {

View File

@@ -68,7 +68,7 @@ export async function GET(req: NextRequest, { params }: Params) {
const getCleanMd_ = unstable_cache(
async (filePath: string, plugins?: { before?: Plugin[]; after?: Plugin[] }) =>
getCleanMd({ filePath, plugins }),
getCleanMd({ file: filePath, plugins }),
["clean-md"],
{
revalidate: 3600,

View File

@@ -29,7 +29,10 @@
},
"dependencies": {
"docs-utils": "*",
"tags": "*"
"fdir": "^6.4.3",
"slugify": "^1.6.6",
"tags": "*",
"yaml": "^2.7.0"
},
"devDependencies": {
"@types/node": "^20.11.20",

View File

@@ -0,0 +1,115 @@
import { getCleanMd, GetCleanMdOptions } from "docs-utils"
import { fdir } from "fdir"
import { writeFile } from "fs/promises"
import path from "path"
import {
apiRefLlmsGenerator,
CustomLlmsGenerator,
jsSdkLlmsGenerator,
stepsLlmsGenerator,
workflowsLlmsGenerator,
} from "./utils/custom-llms-generators.js"
type FileExt = "md" | "yaml"
type Options = {
outputPath: string
scanDirs: {
dir: string
options?: Omit<GetCleanMdOptions, "file" | "type">
allowedFilesPatterns?: RegExp[]
generator?: {
name: "workflows" | "steps" | "jsSdk" | "apiRef"
options: Record<string, unknown>
}
ext?: FileExt
}[]
introText?: string
plugins?: GetCleanMdOptions["plugins"]
}
const generators: Record<string, CustomLlmsGenerator<any>> = {
workflows: workflowsLlmsGenerator,
steps: stepsLlmsGenerator,
jsSdk: jsSdkLlmsGenerator,
apiRef: apiRefLlmsGenerator,
}
const isExtAllowed = (fileName: string, allowedExt: FileExt) => {
switch (allowedExt) {
case "md":
return fileName.endsWith(".md") || fileName.endsWith(".mdx")
case "yaml":
return fileName.endsWith(".yaml") || fileName.endsWith(".yml")
}
}
const getContentFromDir = async ({
dir,
options = {},
allowedFilesPatterns,
generator,
ext = "md",
}: Options["scanDirs"][0]): Promise<string> => {
const files = await new fdir()
.withFullPaths()
.filter((file) => {
const baseName = path.basename(file)
return isExtAllowed(baseName, ext) && !baseName.startsWith("_")
})
.filter(
(file) =>
!allowedFilesPatterns?.length ||
allowedFilesPatterns.some((pattern) => file.match(pattern))
)
.crawl(dir)
.withPromise()
const content: string[] =
generator?.name && generators[generator?.name]
? [await generators[generator?.name](files, generator.options)]
: []
if (content.length) {
return await getCleanMd({
file: content.join("\n\n"),
...options,
type: "content",
})
}
for (const file of files) {
content.push(
await getCleanMd({
file,
...options,
})
)
}
return content.join("\n\n")
}
export const generateLlmsFull = async ({
outputPath,
scanDirs,
introText = "",
plugins,
}: Options) => {
const text: string[] = [introText]
for (const scanDir of scanDirs) {
text.push(
await getContentFromDir({
...scanDir,
options: {
plugins,
...scanDir.options,
},
})
)
}
await writeFile(outputPath, text.join("\n\n"))
}

View File

@@ -1,4 +1,5 @@
export * from "./generate-edited-dates.js"
export * from "./generate-llms-full.js"
export * from "./generate-sidebar.js"
export * from "./retrieve-mdx-pages.js"

View File

@@ -0,0 +1,127 @@
import { findPageTitle, oasFileToPath } from "docs-utils"
import { readFile } from "fs/promises"
import path from "path"
import pkg from "slugify"
import YAML from "yaml"
const slugify = pkg.default
const monorepoRoot = path.resolve(process.cwd(), "..", "..", "..")
const referencesRoot = path.join(monorepoRoot, "www", "apps", "resources")
const slugsPath = path.join(
monorepoRoot,
"www",
"apps",
"resources",
"generated",
"slug-changes.mjs"
)
const slugsFileContentPrefix = `export const slugChanges = `
export type CustomLlmsGenerator<T = Record<string, unknown>> = (
files: string[],
options?: T
) => Promise<string>
type CommonOptions = {
baseUrl?: string
}
export const workflowsLlmsGenerator: CustomLlmsGenerator<
CommonOptions
> = async (files, options) => {
return generateListForReferenceFiles({
files,
title: "Workflows",
itemContent: (title, fileSlug) =>
`[${title.replace(/ - .+/, "")}](${fileSlug})`,
options,
})
}
export const stepsLlmsGenerator: CustomLlmsGenerator<CommonOptions> = async (
files,
options
) => {
return generateListForReferenceFiles({
files,
title: "Steps",
itemContent: (title, fileSlug) =>
`[${title.replace(/ - .+/, "")}](${fileSlug})`,
options,
})
}
export const jsSdkLlmsGenerator: CustomLlmsGenerator<
CommonOptions & {
type: "Admin" | "Store" | "Auth"
}
> = async (files, options) => {
return generateListForReferenceFiles({
files,
title: `JS SDK ${options?.type}`,
itemContent: (title, fileSlug) =>
`[${title.replace(/ - .+/, "")}](${fileSlug})`,
options,
})
}
export const apiRefLlmsGenerator: CustomLlmsGenerator<
CommonOptions & {
type: "Admin" | "Store"
}
> = async (files, options) => {
let content = `## ${options?.type} API Reference\n\n`
for (const file of files) {
const baseName = path.basename(file)
const fileYaml = YAML.parse(await readFile(file, "utf-8"))
const oasPath = oasFileToPath(baseName)
Object.entries(fileYaml).forEach(
([httpMethod, operation]: [string, any]) => {
const hash = `${slugify(operation.tags[0])}_${slugify(operation.operationId)}`
content += `- [${httpMethod.toUpperCase()} ${oasPath}](${options?.baseUrl}#${hash})\n`
}
)
}
return content
}
/**
* Helpers
*/
export const generateListForReferenceFiles = async ({
files,
title,
itemContent,
options,
}: {
files: string[]
title: string
itemContent: (title: string, fileSlug: string) => string
options?: CommonOptions
}) => {
const slugChanges = JSON.parse(
(await readFile(slugsPath, "utf-8")).replace(slugsFileContentPrefix, "")
)
let content = `## ${title}\n\n`
for (const file of files) {
const relativeFilePath = file.replace(monorepoRoot, "")
const fileSlug = `${options?.baseUrl}${
slugChanges.find(
(slugChange: any) => slugChange.filePath === relativeFilePath
)?.newSlug ||
file.replace(referencesRoot, "").replace(/\/page\.mdx?$/, "")
}/index.html.md`
const itemTitle = (findPageTitle(file) || "").replace(/ - .+/, "")
content += `- ${itemContent(itemTitle, fileSlug)}\n`
}
return content
}

View File

@@ -145,6 +145,22 @@ const removeFrontmatterPlugin = (): Transformer => {
}
}
const changeLinksPlugin = (): Transformer => {
return async (tree) => {
const { visit } = await import("unist-util-visit")
visit(tree as UnistTree, ["link"], (node: UnistNode) => {
if (
node.type === "link" &&
node.url?.startsWith("https://docs.medusajs.com") &&
!node.url.endsWith("index.html.md")
) {
node.url += `/index.html.md`
}
})
}
}
const getParsedAsString = (file: VFile): string => {
let content = file.toString().replaceAll(/^([\s]*)\* /gm, "$1- ")
const frontmatter = file.data.matter as FrontMatter | undefined
@@ -156,21 +172,23 @@ const getParsedAsString = (file: VFile): string => {
return content
}
type Options = {
filePath: string
export type GetCleanMdOptions = {
file: string
plugins?: {
before?: Plugin[]
after?: Plugin[]
}
parserOptions?: ParserPluginOptions
type?: "file" | "content"
}
export const getCleanMd = async ({
filePath,
file,
plugins,
parserOptions,
}: Options): Promise<string> => {
if (!filePath.endsWith(".md") && !filePath.endsWith(".mdx")) {
type = "file",
}: GetCleanMdOptions): Promise<string> => {
if (type === "file" && !file.endsWith(".md") && !file.endsWith(".mdx")) {
return ""
}
const unifier = unified()
@@ -196,7 +214,10 @@ export const getCleanMd = async ({
unifier.use(...(Array.isArray(plugin) ? plugin : [plugin]))
})
const parsed = await unifier.process(await read(filePath))
unifier.use(changeLinksPlugin)
const content = type === "file" ? await read(file) : file
const parsed = await unifier.process(content)
return getParsedAsString(parsed)
}

View File

@@ -5,3 +5,4 @@ export * from "./get-clean-md.js"
export * from "./get-file-slug-sync.js"
export * from "./get-file-slug.js"
export * from "./get-front-matter.js"
export * from "./oas-file-to-path.js"

View File

@@ -0,0 +1,5 @@
export function oasFileToPath(fileName: string): string {
return `/${fileName
.replaceAll(/(?<!\{[^}]*)_(?![^{]*\})/g, "/")
.replace(/\.[A-Za-z]+$/, "")}`
}

View File

@@ -5533,6 +5533,7 @@ __metadata:
autoprefixer: 10.4.14
clsx: ^2.0.0
docs-ui: "*"
docs-utils: "*"
eslint: ^9.13.0
eslint-plugin-prettier: ^5.2.1
eslint-plugin-react-hooks: ^5.0.0
@@ -5943,11 +5944,14 @@ __metadata:
dependencies:
"@types/node": ^20.11.20
docs-utils: "*"
fdir: ^6.4.3
rimraf: ^5.0.5
slugify: ^1.6.6
tags: "*"
tsconfig: "*"
types: "*"
typescript: ^5.3.3
yaml: ^2.7.0
peerDependencies:
docs-utils: "*"
languageName: unknown
@@ -8515,6 +8519,18 @@ __metadata:
languageName: node
linkType: hard
"fdir@npm:^6.4.3":
version: 6.4.3
resolution: "fdir@npm:6.4.3"
peerDependencies:
picomatch: ^3 || ^4
peerDependenciesMeta:
picomatch:
optional: true
checksum: d13c10120e9625adf21d8d80481586200759928c19405a816b77dd28eaeb80e7c59c5def3e2941508045eb06d34eb47fad865ccc8bf98e6ab988bb0ed160fb6f
languageName: node
linkType: hard
"fetch-blob@npm:^3.1.2, fetch-blob@npm:^3.1.4":
version: 3.2.0
resolution: "fetch-blob@npm:3.2.0"
@@ -15888,6 +15904,15 @@ turbo@latest:
languageName: node
linkType: hard
"yaml@npm:^2.7.0":
version: 2.7.0
resolution: "yaml@npm:2.7.0"
bin:
yaml: bin.mjs
checksum: 886a7d2abbd70704b79f1d2d05fe9fb0aa63aefb86e1cb9991837dced65193d300f5554747a872b4b10ae9a12bc5d5327e4d04205f70336e863e35e89d8f4ea9
languageName: node
linkType: hard
"yargs-parser@npm:^21.1.1":
version: 21.1.1
resolution: "yargs-parser@npm:21.1.1"