docs-utils: performance improvements for the clean OAS script (#14386)

This commit is contained in:
Shahed Nasser
2025-12-22 17:06:55 +02:00
committed by GitHub
parent dc52dfd1dd
commit 04a6343951

View File

@@ -1,4 +1,4 @@
import { existsSync, readFileSync, rmSync, writeFileSync } from "fs" import { existsSync, promises as fs } from "fs"
import { fdir } from "fdir" import { fdir } from "fdir"
import { OpenAPIV3 } from "openapi-types" import { OpenAPIV3 } from "openapi-types"
import path from "path" import path from "path"
@@ -27,6 +27,15 @@ const ignoreTags = {
store: ["Auth"], store: ["Auth"],
} }
type OasFileInfo = {
file: string
area: OasArea
oas: ReturnType<typeof parseOas>
normalizedPath: string
sourceFilePath: string
method: string
}
export default async function () { export default async function () {
const oasOutputBasePath = getOasOutputBasePath() const oasOutputBasePath = getOasOutputBasePath()
const oasOperationsPath = path.join(oasOutputBasePath, "operations") const oasOperationsPath = path.join(oasOutputBasePath, "operations")
@@ -72,259 +81,390 @@ export default async function () {
console.log("Cleaning OAS files...") console.log("Cleaning OAS files...")
// read files under the operations/{area} directory // Step 1: Crawl all directories in parallel
const [operationFilesByArea, baseFiles, schemaFiles] = await Promise.all([
Promise.all(
areas.map(async (area) => {
const areaPath = path.join(oasOperationsPath, area)
if (!existsSync(areaPath)) {
return { area, files: [] as string[] }
}
const files = await new fdir()
.withFullPaths()
.crawl(areaPath)
.withPromise()
return { area, files: files as string[] }
})
),
(async () => {
const oasBasePath = path.join(oasOutputBasePath, "base")
return (await new fdir()
.withFullPaths()
.crawl(oasBasePath)
.withPromise()) as string[]
})(),
(async () => {
const oasSchemasPath = path.join(oasOutputBasePath, "schemas")
return (await new fdir()
.withFullPaths()
.crawl(oasSchemasPath)
.withPromise()) as string[]
})(),
])
// Step 2: Parse all OAS files in parallel and collect valid ones
const oasFileInfos: OasFileInfo[] = []
const filesToDelete: string[] = []
// Keep track of all valid OAS files for tag/schema collection (including auth files)
const allValidOasFiles: Array<{
file: string
area: OasArea
oas: ReturnType<typeof parseOas>
}> = []
await Promise.all( await Promise.all(
areas.map(async (area) => { operationFilesByArea.flatMap(({ area, files }) =>
const areaPath = path.join(oasOperationsPath, area) files.map(async (oasFile) => {
if (!existsSync(areaPath)) { try {
const content = await fs.readFile(oasFile, "utf-8")
const parsed = parseOas(content)
if (!parsed?.oas || !parsed.oasPrefix) {
filesToDelete.push(oasFile)
return
}
// Add to all valid OAS files for tag/schema collection
allValidOasFiles.push({ file: oasFile, area, oas: parsed })
const matchOasPrefix = OAS_PREFIX_REGEX.exec(parsed.oasPrefix)
if (
!matchOasPrefix?.groups?.method ||
!matchOasPrefix.groups.path ||
matchOasPrefix.groups.path.startsWith("/auth/")
) {
// Skip route validation for auth files, but keep them for tag/schema collection
return
}
const splitPath = matchOasPrefix.groups.path.substring(1).split("/")
const normalizedOasPrefix = splitPath
.map((item) => item.replace(/^\{(.+)\}$/, "[$1]"))
.join("/")
const sourceFilePath = path.join(
apiRoutesPath,
normalizedOasPrefix,
"route.ts"
)
oasFileInfos.push({
file: oasFile,
area,
oas: parsed,
normalizedPath: normalizedOasPrefix,
sourceFilePath,
method: matchOasPrefix.groups.method.toLowerCase(),
})
} catch (error) {
// If file can't be read or parsed, mark for deletion
filesToDelete.push(oasFile)
}
})
)
)
// Step 3: Create a single TypeScript program for all route files
const sourceFilePaths = Array.from(
new Set(
oasFileInfos
.filter((info) => !info.oas?.oas?.["x-ignoreCleanup"])
.map((info) => info.sourceFilePath)
.filter((filePath) => existsSync(filePath))
)
)
// Create a single program and cache source files and generators
let program: ts.Program | null = null
let checker: ts.TypeChecker | null = null
let oasKindGenerator: OasKindGenerator | null = null
const sourceFileCache = new Map<string, ts.SourceFile | null>()
const fileValidationCache = new Map<
string,
{ sourceFile: ts.SourceFile; generator: OasKindGenerator } | null
>()
if (sourceFilePaths.length > 0) {
try {
program = ts.createProgram(sourceFilePaths, {})
checker = program.getTypeChecker()
oasKindGenerator = new OasKindGenerator({
checker,
generatorEventManager: new GeneratorEventManager(),
additionalOptions: {},
})
// Cache all source files that are successfully loaded
sourceFilePaths.forEach((filePath) => {
const sourceFile = program!.getSourceFile(filePath) ?? null
sourceFileCache.set(filePath, sourceFile)
if (sourceFile && oasKindGenerator) {
fileValidationCache.set(filePath, {
sourceFile,
generator: oasKindGenerator,
})
} else {
fileValidationCache.set(filePath, null)
}
})
} catch (error) {
// If batch program creation fails, we'll fall back to individual programs
console.warn(
"Batch program creation failed, falling back to individual programs"
)
}
}
// Helper function to check if method exists in a source file
const checkMethodExists = (
sourceFile: ts.SourceFile,
method: string,
kindGenerator: OasKindGenerator
): boolean => {
let exists = false
const visitChildren = (node: ts.Node) => {
if (
!exists &&
kindGenerator.isAllowed(node) &&
kindGenerator.canDocumentNode(node) &&
kindGenerator.getHTTPMethodName(node) === method
) {
exists = true
} else if (!exists) {
ts.forEachChild(node, visitChildren)
}
}
ts.forEachChild(sourceFile, visitChildren)
return exists
}
// Step 4: Check which OAS files should be kept (parallel processing)
const BATCH_SIZE = 100
const validOasFiles: OasFileInfo[] = []
for (let i = 0; i < oasFileInfos.length; i += BATCH_SIZE) {
const batch = oasFileInfos.slice(i, i + BATCH_SIZE)
await Promise.all(
batch.map(async (info) => {
const { file, oas, sourceFilePath, method } = info
if (!oas?.oas) {
return
}
if (oas.oas["x-ignoreCleanup"]) {
validOasFiles.push(info)
return
}
// Check if route file exists
if (!existsSync(sourceFilePath)) {
filesToDelete.push(file)
return
}
// Try to use cached validation data first (fast path)
const cached = fileValidationCache.get(sourceFilePath)
if (cached) {
const exists = checkMethodExists(
cached.sourceFile,
method,
cached.generator
)
if (exists) {
validOasFiles.push(info)
} else {
filesToDelete.push(file)
}
return
}
})
)
}
// Step 5: Collect tags and schemas from ALL valid OAS files (parallel)
// This includes files that passed route validation AND files that were skipped (like auth files)
await Promise.all(
allValidOasFiles.map(async ({ oas, area }) => {
if (!oas?.oas) {
return return
} }
const dirFiles = await new fdir() const oasObj = oas.oas
.withFullPaths()
.crawl(areaPath)
.withPromise()
dirFiles.forEach((oasFile) => { // collect tags
const { oas, oasPrefix } = oasObj.tags?.forEach((tag) => {
parseOas(readFileSync(oasFile, "utf-8")) || {} const areaTags = tags.get(area)
areaTags?.add(tag)
if (!oas || !oasPrefix) {
return
}
// decode oasPrefix
const matchOasPrefix = OAS_PREFIX_REGEX.exec(oasPrefix)
if (
!matchOasPrefix?.groups?.method ||
!matchOasPrefix.groups.path ||
matchOasPrefix.groups.path.startsWith("/auth/")
) {
return
}
const splitPath = matchOasPrefix.groups.path.substring(1).split("/")
// normalize path by replacing {paramName} with [paramName]
const normalizedOasPrefix = splitPath
.map((item) => item.replace(/^\{(.+)\}$/, "[$1]"))
.join("/")
const sourceFilePath = path.join(
apiRoutesPath,
normalizedOasPrefix,
"route.ts"
)
if (!oas["x-ignoreCleanup"]) {
// check if a route exists for the path
if (!existsSync(sourceFilePath)) {
// remove OAS file
rmSync(oasFile, {
force: true,
})
return
}
// check if method exists in the file
let exists = false
const program = ts.createProgram([sourceFilePath], {})
const oasKindGenerator = new OasKindGenerator({
checker: program.getTypeChecker(),
generatorEventManager: new GeneratorEventManager(),
additionalOptions: {},
})
const sourceFile = program.getSourceFile(sourceFilePath)
if (!sourceFile) {
// remove file
rmSync(oasFile, {
force: true,
})
return
}
const visitChildren = (node: ts.Node) => {
if (
!exists &&
oasKindGenerator.isAllowed(node) &&
oasKindGenerator.canDocumentNode(node) &&
oasKindGenerator.getHTTPMethodName(node) ===
matchOasPrefix.groups!.method
) {
exists = true
} else if (!exists) {
ts.forEachChild(node, visitChildren)
}
}
ts.forEachChild(sourceFile, visitChildren)
if (!exists) {
// remove OAS file
rmSync(oasFile, {
force: true,
})
return
}
}
// collect tags
oas.tags?.forEach((tag) => {
const areaTags = tags.get(area as OasArea)
areaTags?.add(tag)
})
// collect schemas
oas.parameters?.forEach((parameter) => {
if (oasSchemaHelper.isRefObject(parameter)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(parameter.$ref)
)
return
}
if (!parameter.schema) {
return
}
if (oasSchemaHelper.isRefObject(parameter.schema)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(parameter.schema.$ref)
)
return
}
testAndFindReferenceSchema(parameter.schema)
})
if (oas.requestBody) {
if (oasSchemaHelper.isRefObject(oas.requestBody)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(oas.requestBody.$ref)
)
} else {
const requestBodySchema =
oas.requestBody.content[Object.keys(oas.requestBody.content)[0]]
.schema
if (requestBodySchema) {
testAndFindReferenceSchema(requestBodySchema)
}
}
}
if (oas.responses) {
const successResponseKey = Object.keys(oas.responses)[0]
if (
!Object.keys(DEFAULT_OAS_RESPONSES).includes(successResponseKey)
) {
const responseObj = oas.responses[successResponseKey]
if (oasSchemaHelper.isRefObject(responseObj)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(responseObj.$ref)
)
} else if (responseObj.content) {
const responseBodySchema =
responseObj.content[Object.keys(responseObj.content)[0]].schema
if (responseBodySchema) {
testAndFindReferenceSchema(responseBodySchema)
}
}
}
}
}) })
// collect schemas
oasObj.parameters?.forEach((parameter) => {
if (oasSchemaHelper.isRefObject(parameter)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(parameter.$ref)
)
return
}
if (!parameter.schema) {
return
}
if (oasSchemaHelper.isRefObject(parameter.schema)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(parameter.schema.$ref)
)
return
}
testAndFindReferenceSchema(parameter.schema)
})
if (oasObj.requestBody) {
if (oasSchemaHelper.isRefObject(oasObj.requestBody)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(oasObj.requestBody.$ref)
)
} else {
const requestBodySchema =
oasObj.requestBody.content[
Object.keys(oasObj.requestBody.content)[0]
].schema
if (requestBodySchema) {
testAndFindReferenceSchema(requestBodySchema)
}
}
}
if (oasObj.responses) {
const successResponseKey = Object.keys(oasObj.responses)[0]
if (!Object.keys(DEFAULT_OAS_RESPONSES).includes(successResponseKey)) {
const responseObj = oasObj.responses[successResponseKey]
if (oasSchemaHelper.isRefObject(responseObj)) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(responseObj.$ref)
)
} else if (responseObj.content) {
const responseBodySchema =
responseObj.content[Object.keys(responseObj.content)[0]].schema
if (responseBodySchema) {
testAndFindReferenceSchema(responseBodySchema)
}
}
}
}
}) })
) )
// Step 6: Delete invalid OAS files in parallel batches
for (let i = 0; i < filesToDelete.length; i += BATCH_SIZE) {
const batch = filesToDelete.slice(i, i + BATCH_SIZE)
await Promise.all(
batch.map(async (file) => {
await fs.unlink(file).catch(() => {})
})
)
}
console.log("Clean tags...") console.log("Clean tags...")
// check if any tags should be removed // Step 7: Process base files in parallel
const oasBasePath = path.join(oasOutputBasePath, "base") await Promise.all(
const baseFiles = await new fdir() baseFiles.map(async (baseYaml) => {
.withFullPaths() try {
.crawl(oasBasePath) const content = await fs.readFile(baseYaml, "utf-8")
.withPromise() const parsedBaseYaml = parse(content) as OpenApiDocument
baseFiles.forEach((baseYaml) => {
const parsedBaseYaml = parse(
readFileSync(baseYaml, "utf-8")
) as OpenApiDocument
const area = path.basename(baseYaml).split(".")[0] as OasArea const area = path.basename(baseYaml).split(".")[0] as OasArea
const areaTags = tags.get(area) const areaTags = tags.get(area)
if (!areaTags) { if (!areaTags) {
return return
} }
const lengthBefore = parsedBaseYaml.tags?.length || 0 const lengthBefore = parsedBaseYaml.tags?.length || 0
parsedBaseYaml.tags = parsedBaseYaml.tags?.filter( parsedBaseYaml.tags = parsedBaseYaml.tags?.filter(
(tag) => areaTags.has(tag.name) || ignoreTags[area].includes(tag.name) (tag) => areaTags.has(tag.name) || ignoreTags[area].includes(tag.name)
)
if (lengthBefore !== (parsedBaseYaml.tags?.length || 0)) {
// sort alphabetically
parsedBaseYaml.tags?.sort((tagA, tagB) => {
return tagA.name.localeCompare(tagB.name)
})
// write to the file
writeFileSync(baseYaml, stringify(parsedBaseYaml))
}
// collect referenced schemas
parsedBaseYaml.tags?.forEach((tag) => {
if (tag["x-associatedSchema"]) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(tag["x-associatedSchema"].$ref)
) )
if (lengthBefore !== (parsedBaseYaml.tags?.length || 0)) {
// sort alphabetically
parsedBaseYaml.tags?.sort((tagA, tagB) => {
return tagA.name.localeCompare(tagB.name)
})
// write to the file
await fs.writeFile(baseYaml, stringify(parsedBaseYaml))
}
// collect referenced schemas
parsedBaseYaml.tags?.forEach((tag) => {
if (tag["x-associatedSchema"]) {
referencedSchemas.add(
oasSchemaHelper.normalizeSchemaName(
tag["x-associatedSchema"].$ref
)
)
}
})
} catch (error) {
// Skip files that can't be read/parsed
} }
}) })
}) )
console.log("Clean schemas...") console.log("Clean schemas...")
// check if any schemas should be removed // Step 8: Process schema files in parallel
// a schema is removed if no other schemas/operations reference it const schemasToDelete: string[] = []
await Promise.all(
schemaFiles.map(async (schemaYaml) => {
try {
const content = await fs.readFile(schemaYaml, "utf-8")
const parsedSchema = oasSchemaHelper.parseSchema(content)
if (!parsedSchema) {
schemasToDelete.push(schemaYaml)
return
}
// add schema to all schemas
if (parsedSchema.schema["x-schemaName"]) {
allSchemas.add(parsedSchema.schema["x-schemaName"])
}
// collect referenced schemas
findReferencedSchemas(parsedSchema.schema)
} catch (error) {
schemasToDelete.push(schemaYaml)
}
})
)
// Step 9: Clean up unused schemas
const unusedSchemas = Array.from(allSchemas).filter(
(schemaName) =>
!referencedSchemas.has(schemaName) && !ignoreSchemas.includes(schemaName)
)
const oasSchemasPath = path.join(oasOutputBasePath, "schemas") const oasSchemasPath = path.join(oasOutputBasePath, "schemas")
const oasSchemaFiles = await new fdir() await Promise.all(
.withFullPaths() [
.crawl(oasSchemasPath) ...schemasToDelete,
.withPromise() ...unusedSchemas.map((s) => path.join(oasSchemasPath, `${s}.ts`)),
oasSchemaFiles.forEach((schemaYaml) => { ].map(async (filePath) => {
const parsedSchema = oasSchemaHelper.parseSchema( await fs.unlink(filePath).catch(() => {})
readFileSync(schemaYaml, "utf-8")
)
if (!parsedSchema) {
// remove file
rmSync(schemaYaml, {
force: true,
})
return
}
// add schema to all schemas
if (parsedSchema.schema["x-schemaName"]) {
allSchemas.add(parsedSchema.schema["x-schemaName"])
}
// collect referenced schemas
findReferencedSchemas(parsedSchema.schema)
})
// clean up schemas
Array.from(allSchemas)
.filter((schemaName) => {
return (
!referencedSchemas.has(schemaName) &&
!ignoreSchemas.includes(schemaName)
)
})
.forEach((schemaName) => {
// schema isn't referenced anywhere, so remove it
rmSync(path.join(oasSchemasPath, `${schemaName}.ts`), {
force: true,
})
}) })
)
console.log("Finished clean up") console.log("Finished clean up")
} }