feat: implement stream based processing of the files (#12574)
Fixes: FRMW-2960 This PR adds support for processing large CSV files by breaking them into chunks and processing one chunk at a time. This is how it works in nutshell. - The CSV file is read as a stream and each chunk of the stream is one CSV row. - We read upto 1000 rows (plus a few more to ensure product variants of a product are not split into multiple chunks). - Each chunk is then normalized using the `CSVNormalizer` and validated using zod schemas. If there is an error, the entire process will be aborted and the existing chunks will be deleted. - Each chunk is written to a JSON file, so that we can process them later (after user confirms) without re-processing or validating the CSV file. - The confirmation process will start consuming one chunk at a time and create/update products using the `batchProducts` workflow. ## Resume or not to resume processing of chunks Let's imagine during processing of chunks, we find that chunk 3 leads to a database error. However, till this time we have processed the first two chunks already. How do we deal with this situation? Options are: - We store at which chunk we failed and then during the re-upload we ignore chunks before the failed one. In my conversation with @olivermrbl we discovered that resuming will have to work with certain assumptions if we decide to implement it. - What if a user updates the CSV rows which are part of the already processed chunks? These changes will be ignored and they will never notice it. - Resuming works if the file name is still the same. What if they made changes and saved the file with "Save as - New name". In that case we will anyways process the entire file. - We will have to fetch the old workflow from the workflow engine using some `ilike` search, so that we can see at which chunk the last run failed for the given file. Co-authored-by: Carlos R. L. Rodrigues <37986729+carlos-r-l-rodrigues@users.noreply.github.com>
This commit is contained in:
7
.changeset/selfish-ways-provide.md
Normal file
7
.changeset/selfish-ways-provide.md
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
"@medusajs/core-flows": patch
|
||||
"@medusajs/framework": patch
|
||||
"@medusajs/utils": patch
|
||||
---
|
||||
|
||||
feat: implement stream based processing of the files
|
||||
@@ -41,6 +41,7 @@
|
||||
"typescript": "^5.6.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"csv-parse": "^5.6.0",
|
||||
"json-2-csv": "^5.5.4"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { HttpTypes } from "@medusajs/framework/types"
|
||||
import { parse, Parser } from "csv-parse"
|
||||
import { HttpTypes, IFileModuleService } from "@medusajs/framework/types"
|
||||
import {
|
||||
CSVNormalizer,
|
||||
Modules,
|
||||
CSVNormalizer,
|
||||
productValidators,
|
||||
} from "@medusajs/framework/utils"
|
||||
import { StepResponse, createStep } from "@medusajs/framework/workflows-sdk"
|
||||
import { convertCsvToJson } from "../utils"
|
||||
|
||||
/**
|
||||
* The CSV file content to parse.
|
||||
@@ -14,6 +14,174 @@ export type NormalizeProductCsvV1StepInput = string
|
||||
|
||||
export const normalizeCsvToChunksStepId = "normalize-product-csv-to-chunks"
|
||||
|
||||
/**
|
||||
* Processes a chunk of products by writing them to a file. Later the
|
||||
* file will be processed after the import has been confirmed.
|
||||
*/
|
||||
async function processChunk(
|
||||
file: IFileModuleService,
|
||||
fileKey: string,
|
||||
csvRows: ReturnType<(typeof CSVNormalizer)["preProcess"]>[],
|
||||
currentRowNumber: number
|
||||
) {
|
||||
const normalizer = new CSVNormalizer(csvRows)
|
||||
const products = normalizer.proccess(currentRowNumber)
|
||||
|
||||
let create = Object.keys(products.toCreate).reduce<
|
||||
HttpTypes.AdminCreateProduct[]
|
||||
>((result, toCreateHandle) => {
|
||||
result.push(
|
||||
productValidators.CreateProduct.parse(
|
||||
products.toCreate[toCreateHandle]
|
||||
) as HttpTypes.AdminCreateProduct
|
||||
)
|
||||
return result
|
||||
}, [])
|
||||
|
||||
let update = Object.keys(products.toUpdate).reduce<
|
||||
HttpTypes.AdminUpdateProduct & { id: string }[]
|
||||
>((result, toUpdateId) => {
|
||||
result.push(
|
||||
productValidators.UpdateProduct.parse(products.toUpdate[toUpdateId])
|
||||
)
|
||||
return result
|
||||
}, [])
|
||||
|
||||
const toCreate = create.length
|
||||
const toUpdate = update.length
|
||||
|
||||
const { id } = await file.createFiles({
|
||||
filename: `${fileKey}.json`,
|
||||
content: JSON.stringify({ create, update }),
|
||||
mimeType: "application/json",
|
||||
})
|
||||
|
||||
/**
|
||||
* Release products from the memory
|
||||
*/
|
||||
create = []
|
||||
update = []
|
||||
|
||||
return {
|
||||
id,
|
||||
toCreate,
|
||||
toUpdate,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates chunks by reading CSV rows from the stream
|
||||
*/
|
||||
async function createChunks(
|
||||
file: IFileModuleService,
|
||||
fileKey: string,
|
||||
stream: Parser
|
||||
) {
|
||||
/**
|
||||
* The row under process
|
||||
*/
|
||||
let currentCSVRow = 0
|
||||
|
||||
/**
|
||||
* Number of rows to process in a chunk. The rows count might go a little
|
||||
* up if there are more rows for the same product.
|
||||
*/
|
||||
const rowsToRead = 1000
|
||||
|
||||
/**
|
||||
* Current count of processed rows for a given chunk.
|
||||
*/
|
||||
let rowsReadSoFar = 0
|
||||
|
||||
/**
|
||||
* Validated chunks that have been written with the file
|
||||
* provider
|
||||
*/
|
||||
const chunks: { id: string; toCreate: number; toUpdate: number }[] = []
|
||||
|
||||
/**
|
||||
* Currently collected rows to be processed as one chunk
|
||||
*/
|
||||
let rows: ReturnType<(typeof CSVNormalizer)["preProcess"]>[] = []
|
||||
|
||||
/**
|
||||
* The unique value for the current row. We need this value to scan
|
||||
* more rows after rowsToRead threshold has reached, but the upcoming
|
||||
* rows are part of the same product.
|
||||
*/
|
||||
let currentRowUniqueValue: string | undefined
|
||||
|
||||
try {
|
||||
for await (const row of stream) {
|
||||
rowsReadSoFar++
|
||||
currentCSVRow++
|
||||
const normalizedRow = CSVNormalizer.preProcess(row, currentCSVRow)
|
||||
const rowValueValue =
|
||||
normalizedRow["product id"] || normalizedRow["product handle"]
|
||||
|
||||
/**
|
||||
* Reached rows threshold
|
||||
*/
|
||||
if (rowsReadSoFar > rowsToRead) {
|
||||
/**
|
||||
* The current row unique value is not same as the previous row's
|
||||
* unique value. Hence we can break the chunk here and process
|
||||
* it.
|
||||
*/
|
||||
if (rowValueValue !== currentRowUniqueValue) {
|
||||
chunks.push(
|
||||
await processChunk(
|
||||
file,
|
||||
`${fileKey}-${chunks.length + 1}`,
|
||||
rows,
|
||||
currentCSVRow
|
||||
)
|
||||
)
|
||||
|
||||
/**
|
||||
* Reset for new row
|
||||
*/
|
||||
rows = [normalizedRow]
|
||||
rowsReadSoFar = 0
|
||||
} else {
|
||||
rows.push(normalizedRow)
|
||||
}
|
||||
} else {
|
||||
rows.push(normalizedRow)
|
||||
}
|
||||
|
||||
currentRowUniqueValue = rowValueValue
|
||||
}
|
||||
|
||||
/**
|
||||
* The file has finished and we have collected some rows that were
|
||||
* under the chunk rows size threshold.
|
||||
*/
|
||||
if (rows.length) {
|
||||
chunks.push(
|
||||
await processChunk(
|
||||
file,
|
||||
`${fileKey}-${chunks.length + 1}`,
|
||||
rows,
|
||||
currentCSVRow
|
||||
)
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
if (!stream.destroyed) {
|
||||
stream.destroy()
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup in case of an error
|
||||
*/
|
||||
await file.deleteFiles(chunks.map((chunk) => chunk.id).concat(fileKey))
|
||||
throw error
|
||||
}
|
||||
|
||||
return chunks
|
||||
}
|
||||
|
||||
/**
|
||||
* This step parses a CSV file holding products to import, returning the chunks
|
||||
* to be processed. Each chunk is written to a file using the file provider.
|
||||
@@ -25,47 +193,35 @@ export const normalizeCsvToChunksStep = createStep(
|
||||
normalizeCsvToChunksStepId,
|
||||
async (fileKey: NormalizeProductCsvV1StepInput, { container }) => {
|
||||
const file = container.resolve(Modules.FILE)
|
||||
const contents = await file.getAsBuffer(fileKey)
|
||||
|
||||
const csvProducts = convertCsvToJson<
|
||||
ConstructorParameters<typeof CSVNormalizer>[0][0]
|
||||
>(contents.toString("utf-8"))
|
||||
|
||||
const normalizer = new CSVNormalizer(csvProducts)
|
||||
const products = normalizer.proccess()
|
||||
|
||||
const create = Object.keys(products.toCreate).reduce<
|
||||
HttpTypes.AdminCreateProduct[]
|
||||
>((result, toCreateHandle) => {
|
||||
result.push(
|
||||
productValidators.CreateProduct.parse(
|
||||
products.toCreate[toCreateHandle]
|
||||
) as HttpTypes.AdminCreateProduct
|
||||
const contents = await file.getDownloadStream(fileKey)
|
||||
const chunks = await createChunks(
|
||||
file,
|
||||
fileKey,
|
||||
contents.pipe(
|
||||
parse({
|
||||
columns: true,
|
||||
skip_empty_lines: true,
|
||||
})
|
||||
)
|
||||
return result
|
||||
}, [])
|
||||
)
|
||||
|
||||
const update = Object.keys(products.toUpdate).reduce<
|
||||
HttpTypes.AdminUpdateProduct & { id: string }[]
|
||||
>((result, toUpdateId) => {
|
||||
result.push(
|
||||
productValidators.UpdateProduct.parse(products.toUpdate[toUpdateId])
|
||||
)
|
||||
return result
|
||||
}, [])
|
||||
const summary = chunks.reduce<{ toCreate: number; toUpdate: number }>(
|
||||
(result, chunk) => {
|
||||
result.toCreate = result.toCreate + chunk.toCreate
|
||||
result.toUpdate = result.toUpdate + chunk.toUpdate
|
||||
return result
|
||||
},
|
||||
{ toCreate: 0, toUpdate: 0 }
|
||||
)
|
||||
|
||||
const { id } = await file.createFiles({
|
||||
filename: `${fileKey}.json`,
|
||||
content: JSON.stringify({ create, update }),
|
||||
mimeType: "application/json",
|
||||
})
|
||||
/**
|
||||
* Delete CSV file once we have the chunks
|
||||
*/
|
||||
await file.deleteFiles(fileKey)
|
||||
|
||||
return new StepResponse({
|
||||
chunks: [id],
|
||||
summary: {
|
||||
toCreate: create.length,
|
||||
toUpdate: update.length,
|
||||
},
|
||||
chunks,
|
||||
summary,
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
@@ -20,10 +20,10 @@ export const normalizeCsvStep = createStep(
|
||||
normalizeCsvStepId,
|
||||
async (fileContent: NormalizeProductCsvStepInput) => {
|
||||
const csvProducts =
|
||||
convertCsvToJson<ConstructorParameters<typeof CSVNormalizer>[0][0]>(
|
||||
fileContent
|
||||
)
|
||||
const normalizer = new CSVNormalizer(csvProducts)
|
||||
convertCsvToJson<Record<string, number | string | boolean>>(fileContent)
|
||||
const normalizer = new CSVNormalizer(
|
||||
csvProducts.map((row, index) => CSVNormalizer.preProcess(row, index + 1))
|
||||
)
|
||||
const products = normalizer.proccess()
|
||||
|
||||
const create = Object.keys(products.toCreate).reduce<
|
||||
|
||||
@@ -13,14 +13,23 @@ export const processImportChunksStepId = "process-import-chunks"
|
||||
*/
|
||||
export const processImportChunksStep = createStep(
|
||||
processImportChunksStepId,
|
||||
async (input: { chunks: string[] }, { container }) => {
|
||||
async (input: { chunks: { id: string }[] }, { container }) => {
|
||||
const file = container.resolve(Modules.FILE)
|
||||
|
||||
for (let chunk of input.chunks) {
|
||||
const contents = await file.getAsBuffer(chunk)
|
||||
await batchProductsWorkflow(container).run({
|
||||
input: JSON.parse(contents.toString("utf-8")),
|
||||
})
|
||||
try {
|
||||
for (let chunk of input.chunks) {
|
||||
const contents = await file.getAsBuffer(chunk.id)
|
||||
let products = JSON.parse(contents.toString("utf-8"))
|
||||
await batchProductsWorkflow(container).run({
|
||||
input: products,
|
||||
})
|
||||
products = undefined
|
||||
}
|
||||
} finally {
|
||||
/**
|
||||
* Delete chunks regardless of the import status
|
||||
*/
|
||||
await file.deleteFiles(input.chunks.map((chunk) => chunk.id))
|
||||
}
|
||||
|
||||
return new StepResponse({ completed: true })
|
||||
|
||||
@@ -97,7 +97,8 @@
|
||||
"morgan": "^1.9.1",
|
||||
"path-to-regexp": "^0.1.10",
|
||||
"tsconfig-paths": "^4.2.0",
|
||||
"zod": "3.22.4"
|
||||
"zod": "3.22.4",
|
||||
"zod-validation-error": "^3.4.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@aws-sdk/client-dynamodb": "^3.218.0",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { fromZodIssue } from "zod-validation-error"
|
||||
import { NextFunction, ErrorRequestHandler, Response } from "express"
|
||||
|
||||
import { ContainerRegistrationKeys, MedusaError } from "@medusajs/utils"
|
||||
@@ -75,6 +76,15 @@ export function errorHandler() {
|
||||
break
|
||||
}
|
||||
|
||||
if ("issues" in err && Array.isArray(err.issues)) {
|
||||
const messages = err.issues.map((issue) => fromZodIssue(issue).toString())
|
||||
res.status(statusCode).json({
|
||||
type: MedusaError.Types.INVALID_DATA,
|
||||
message: messages.join("\n"),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
res.status(statusCode).json(errObj)
|
||||
} as unknown as ErrorRequestHandler
|
||||
}
|
||||
|
||||
@@ -88,3 +88,4 @@ export * from "./upper-case-first"
|
||||
export * from "./validate-handle"
|
||||
export * from "./validate-module-name"
|
||||
export * from "./wrap-handler"
|
||||
export * from "./normalize-csv-value"
|
||||
|
||||
10
packages/core/utils/src/common/normalize-csv-value.ts
Normal file
10
packages/core/utils/src/common/normalize-csv-value.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* Normalizes a CSV value by removing the leading "\r" from the
|
||||
* value.
|
||||
*/
|
||||
export function normalizeCSVValue<T>(value: T): T {
|
||||
if (typeof value === "string") {
|
||||
return value.replace(/\\r$/, "").trim() as T
|
||||
}
|
||||
return value
|
||||
}
|
||||
@@ -10,16 +10,16 @@ async function loadFixtureFile(fileName: string) {
|
||||
|
||||
describe("CSV processor", () => {
|
||||
it("should error when both Product Id and Handle are missing", async () => {
|
||||
const processor = new CSVNormalizer([{}])
|
||||
|
||||
expect(() => processor.proccess()).toThrow(
|
||||
"Row 1: Missing product id and handle. One of them are required to process the row"
|
||||
expect(() => CSVNormalizer.preProcess({}, 1)).toThrow(
|
||||
"Row 1: Missing product id and handle. One of these columns are required to process the row"
|
||||
)
|
||||
})
|
||||
|
||||
it("should process a CSV row", async () => {
|
||||
const csvData = await loadFixtureFile("single-row-create.json")
|
||||
const processor = new CSVNormalizer(csvData)
|
||||
const csvData: any[] = await loadFixtureFile("single-row-create.json")
|
||||
const processor = new CSVNormalizer(
|
||||
csvData.map((row, index) => CSVNormalizer.preProcess(row, index + 1))
|
||||
)
|
||||
|
||||
const products = processor.proccess()
|
||||
expect(products).toMatchInlineSnapshot(`
|
||||
@@ -87,8 +87,12 @@ describe("CSV processor", () => {
|
||||
})
|
||||
|
||||
it("should process multiple CSV rows for the same product", async () => {
|
||||
const csvData = await loadFixtureFile("same-product-multiple-rows.json")
|
||||
const processor = new CSVNormalizer(csvData)
|
||||
const csvData: any[] = await loadFixtureFile(
|
||||
"same-product-multiple-rows.json"
|
||||
)
|
||||
const processor = new CSVNormalizer(
|
||||
csvData.map((row, index) => CSVNormalizer.preProcess(row, index + 1))
|
||||
)
|
||||
|
||||
const products = processor.proccess()
|
||||
expect(products).toMatchInlineSnapshot(`
|
||||
@@ -200,10 +204,12 @@ describe("CSV processor", () => {
|
||||
})
|
||||
|
||||
it("should process multiple CSV rows where each variant uses different options", async () => {
|
||||
const csvData = await loadFixtureFile(
|
||||
const csvData: any[] = await loadFixtureFile(
|
||||
"same-product-multiple-variant-options.json"
|
||||
)
|
||||
const processor = new CSVNormalizer(csvData)
|
||||
const processor = new CSVNormalizer(
|
||||
csvData.map((row, index) => CSVNormalizer.preProcess(row, index + 1))
|
||||
)
|
||||
|
||||
const products = processor.proccess()
|
||||
expect(products).toMatchInlineSnapshot(`
|
||||
@@ -325,10 +331,12 @@ describe("CSV processor", () => {
|
||||
})
|
||||
|
||||
it("should process multiple CSV rows with multiple products and variants", async () => {
|
||||
const csvData = await loadFixtureFile(
|
||||
const csvData: any[] = await loadFixtureFile(
|
||||
"multiple-products-multiple-variants.json"
|
||||
)
|
||||
const processor = new CSVNormalizer(csvData)
|
||||
const processor = new CSVNormalizer(
|
||||
csvData.map((row, index) => CSVNormalizer.preProcess(row, index + 1))
|
||||
)
|
||||
|
||||
const products = processor.proccess()
|
||||
expect(products).toMatchInlineSnapshot(`
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
tryConvertToNumber,
|
||||
tryConvertToBoolean,
|
||||
MedusaError,
|
||||
normalizeCSVValue,
|
||||
} from "../common"
|
||||
import { AdminCreateProduct, AdminCreateProductVariant } from "@medusajs/types"
|
||||
|
||||
@@ -17,6 +18,20 @@ type ColumnProcessor<Output> = (
|
||||
output: Output
|
||||
) => void
|
||||
|
||||
type NormalizedRow =
|
||||
| (Record<string, string | number | boolean> & {
|
||||
"product id": string
|
||||
"product handle": string
|
||||
})
|
||||
| {
|
||||
"product id"?: string
|
||||
"product handle": string
|
||||
}
|
||||
| {
|
||||
"product id": string
|
||||
"product handle"?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an error with the CSV row number
|
||||
*/
|
||||
@@ -27,23 +42,12 @@ function createError(rowNumber: number, message: string) {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a CSV value by removing the leading "\r" from the
|
||||
* value.
|
||||
*/
|
||||
function normalizeValue<T>(value: T): T {
|
||||
if (typeof value === "string") {
|
||||
return value.replace(/\\r$/, "").trim() as T
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses different patterns to extract variant price iso
|
||||
* and the region name. The iso is converted to lowercase
|
||||
*/
|
||||
function parseVariantPriceColumn(columnName: string, rowNumber: number) {
|
||||
const normalizedValue = normalizeValue(columnName)
|
||||
const normalizedValue = columnName
|
||||
const potentialRegion = /\[(.*)\]/g.exec(normalizedValue)?.[1]
|
||||
const iso = normalizedValue.split(" ").pop()
|
||||
|
||||
@@ -68,7 +72,7 @@ function processAsString<Output>(
|
||||
outputKey: keyof Output
|
||||
): ColumnProcessor<Output> {
|
||||
return (csvRow, _, __, output) => {
|
||||
const value = normalizeValue(csvRow[inputKey])
|
||||
const value = csvRow[inputKey]
|
||||
if (isPresent(value)) {
|
||||
output[outputKey as any] = value
|
||||
}
|
||||
@@ -83,7 +87,7 @@ function processAsBoolean<Output>(
|
||||
outputKey: keyof Output
|
||||
): ColumnProcessor<Output> {
|
||||
return (csvRow, _, __, output) => {
|
||||
const value = normalizeValue(csvRow[inputKey])
|
||||
const value = csvRow[inputKey]
|
||||
if (isPresent(value)) {
|
||||
output[outputKey as any] = tryConvertToBoolean(value, value)
|
||||
}
|
||||
@@ -99,7 +103,7 @@ function processAsNumber<Output>(
|
||||
options?: { asNumericString: boolean }
|
||||
): ColumnProcessor<Output> {
|
||||
return (csvRow, _, rowNumber, output) => {
|
||||
const value = normalizeValue(csvRow[inputKey])
|
||||
const value = csvRow[inputKey]
|
||||
if (isPresent(value)) {
|
||||
const numericValue = tryConvertToNumber(value)
|
||||
if (numericValue === undefined) {
|
||||
@@ -135,7 +139,7 @@ function processAsCounterValue<Output extends Record<string, any[]>>(
|
||||
rowColumns
|
||||
.filter((rowKey) => inputMatcher.test(rowKey))
|
||||
.forEach((rowKey) => {
|
||||
const value = normalizeValue(csvRow[rowKey])
|
||||
const value = csvRow[rowKey]
|
||||
if (!existingIds.includes(value) && isPresent(value)) {
|
||||
output[outputKey].push({ [arrayItemKey]: value })
|
||||
}
|
||||
@@ -243,7 +247,7 @@ const variantStaticColumns: {
|
||||
"variant origin country",
|
||||
"origin_country"
|
||||
),
|
||||
"variant variant rank": processAsString(
|
||||
"variant variant rank": processAsNumber(
|
||||
"variant variant rank",
|
||||
"variant_rank"
|
||||
),
|
||||
@@ -268,7 +272,7 @@ const variantWildcardColumns: {
|
||||
|
||||
pricesColumns.forEach((columnName) => {
|
||||
const { iso } = parseVariantPriceColumn(columnName, rowNumber)
|
||||
const value = normalizeValue(csvRow[columnName])
|
||||
const value = csvRow[columnName]
|
||||
|
||||
const numericValue = tryConvertToNumber(value)
|
||||
if (numericValue === undefined) {
|
||||
@@ -298,13 +302,13 @@ const optionColumns: {
|
||||
"variant option": (csvRow, rowColumns, rowNumber, output) => {
|
||||
const matcher = /variant option \d+ name/
|
||||
const optionNameColumns = rowColumns.filter((rowKey) => {
|
||||
return matcher.test(rowKey) && isPresent(normalizeValue(csvRow[rowKey]))
|
||||
return matcher.test(rowKey) && isPresent(csvRow[rowKey])
|
||||
})
|
||||
|
||||
output["options"] = optionNameColumns.map((columnName) => {
|
||||
const [, , counter] = columnName.split(" ")
|
||||
const key = normalizeValue(csvRow[columnName])
|
||||
const value = normalizeValue(csvRow[`variant option ${counter} value`])
|
||||
const key = csvRow[columnName]
|
||||
const value = csvRow[`variant option ${counter} value`]
|
||||
|
||||
if (!isPresent(value)) {
|
||||
throw createError(rowNumber, `Missing option value for "${columnName}"`)
|
||||
@@ -336,6 +340,52 @@ const knownWildcardColumns = Object.keys(productWildcardColumns)
|
||||
* the required fields in the normalized output.
|
||||
*/
|
||||
export class CSVNormalizer {
|
||||
/**
|
||||
* Normalizes a row by converting all keys to lowercase and removing
|
||||
* the leading "\r" from the keys and the values.
|
||||
*
|
||||
* Also, it values the row to contain unknown columns and must contain
|
||||
* the "product id" or "product handle" columns.
|
||||
*/
|
||||
static preProcess(
|
||||
row: Record<string, string | boolean | number>,
|
||||
rowNumber: number
|
||||
): NormalizedRow {
|
||||
const unknownColumns: string[] = []
|
||||
|
||||
const normalized = Object.keys(row).reduce((result, key) => {
|
||||
const lowerCaseKey = normalizeCSVValue(key).toLowerCase()
|
||||
|
||||
if (
|
||||
!knownStaticColumns.includes(lowerCaseKey) &&
|
||||
!knownWildcardColumns.some((column) => lowerCaseKey.startsWith(column))
|
||||
) {
|
||||
unknownColumns.push(key)
|
||||
}
|
||||
|
||||
result[lowerCaseKey] = normalizeCSVValue(row[key])
|
||||
return result
|
||||
}, {})
|
||||
|
||||
if (unknownColumns.length) {
|
||||
throw new MedusaError(
|
||||
MedusaError.Types.INVALID_DATA,
|
||||
`Invalid column name(s) "${unknownColumns.join('","')}"`
|
||||
)
|
||||
}
|
||||
|
||||
const productId = normalized["product id"]
|
||||
const productHandle = normalized["product handle"]
|
||||
if (!isPresent(productId) && !isPresent(productHandle)) {
|
||||
throw createError(
|
||||
rowNumber,
|
||||
"Missing product id and handle. One of these columns are required to process the row"
|
||||
)
|
||||
}
|
||||
|
||||
return normalized as NormalizedRow
|
||||
}
|
||||
|
||||
#rows: Record<string, string | boolean | number>[]
|
||||
|
||||
#products: {
|
||||
@@ -354,30 +404,10 @@ export class CSVNormalizer {
|
||||
toUpdate: {},
|
||||
}
|
||||
|
||||
constructor(rows: Record<string, string | boolean | number>[]) {
|
||||
constructor(rows: NormalizedRow[]) {
|
||||
this.#rows = rows
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures atleast one of the product id or the handle is provided. Otherwise
|
||||
* we cannot process the row
|
||||
*/
|
||||
#ensureRowHasProductIdentifier(
|
||||
row: Record<string, string | boolean | number>,
|
||||
rowNumber: number
|
||||
) {
|
||||
const productId = row["product id"]
|
||||
const productHandle = row["product handle"]
|
||||
if (!isPresent(productId) && !isPresent(productHandle)) {
|
||||
throw createError(
|
||||
rowNumber,
|
||||
"Missing product id and handle. One of them are required to process the row"
|
||||
)
|
||||
}
|
||||
|
||||
return { productId, productHandle }
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a product object or returns an existing one
|
||||
* by its id. The products with ids are treated as updates
|
||||
@@ -400,37 +430,6 @@ export class CSVNormalizer {
|
||||
return this.#products.toCreate[handle]!
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes a row by converting all keys to lowercase and creating a
|
||||
* new object
|
||||
*/
|
||||
#normalizeRow(row: Record<string, any>) {
|
||||
const unknownColumns: string[] = []
|
||||
|
||||
const normalized = Object.keys(row).reduce((result, key) => {
|
||||
const lowerCaseKey = key.toLowerCase()
|
||||
result[lowerCaseKey] = row[key]
|
||||
|
||||
if (
|
||||
!knownStaticColumns.includes(lowerCaseKey) &&
|
||||
!knownWildcardColumns.some((column) => lowerCaseKey.startsWith(column))
|
||||
) {
|
||||
unknownColumns.push(key)
|
||||
}
|
||||
|
||||
return result
|
||||
}, {})
|
||||
|
||||
if (unknownColumns.length) {
|
||||
throw new MedusaError(
|
||||
MedusaError.Types.INVALID_DATA,
|
||||
`Invalid column name(s) "${unknownColumns.join('","')}"`
|
||||
)
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a given CSV row
|
||||
*/
|
||||
@@ -439,10 +438,8 @@ export class CSVNormalizer {
|
||||
rowNumber: number
|
||||
) {
|
||||
const rowColumns = Object.keys(row)
|
||||
const { productHandle, productId } = this.#ensureRowHasProductIdentifier(
|
||||
row,
|
||||
rowNumber
|
||||
)
|
||||
const productId = row["product id"]
|
||||
const productHandle = row["product handle"]
|
||||
|
||||
/**
|
||||
* Create representation of a product by its id or handle and process
|
||||
@@ -508,10 +505,11 @@ export class CSVNormalizer {
|
||||
/**
|
||||
* Process CSV rows. The return value is a tree of products
|
||||
*/
|
||||
proccess() {
|
||||
proccess(resumingFromIndex: number = 0) {
|
||||
this.#rows.forEach((row, index) =>
|
||||
this.#processRow(this.#normalizeRow(row), index + 1)
|
||||
this.#processRow(row, resumingFromIndex + index + 1)
|
||||
)
|
||||
this.#rows = []
|
||||
return this.#products
|
||||
}
|
||||
}
|
||||
|
||||
18
yarn.lock
18
yarn.lock
@@ -6086,6 +6086,7 @@ __metadata:
|
||||
"@swc/core": ^1.7.28
|
||||
"@swc/jest": ^0.2.36
|
||||
awilix: ^8.0.1
|
||||
csv-parse: ^5.6.0
|
||||
expect-type: ^0.20.0
|
||||
jest: ^29.7.0
|
||||
json-2-csv: ^5.5.4
|
||||
@@ -6343,6 +6344,7 @@ __metadata:
|
||||
typescript: ^5.6.2
|
||||
vite: ^5.4.14
|
||||
zod: 3.22.4
|
||||
zod-validation-error: ^3.4.1
|
||||
peerDependencies:
|
||||
"@aws-sdk/client-dynamodb": ^3.218.0
|
||||
"@medusajs/cli": 2.8.3
|
||||
@@ -19320,6 +19322,13 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"csv-parse@npm:^5.6.0":
|
||||
version: 5.6.0
|
||||
resolution: "csv-parse@npm:5.6.0"
|
||||
checksum: 52f5e6c45359902e0c8e57fc2eeed41366dc6b6d283b495b538dd50c8e8510413d6f924096ea056319cbbb8ed26e111c3a3485d7985c021bcf5abaa9e92425c7
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"csv-stringify@npm:^5.6.5":
|
||||
version: 5.6.5
|
||||
resolution: "csv-stringify@npm:5.6.5"
|
||||
@@ -35567,6 +35576,15 @@ __metadata:
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"zod-validation-error@npm:^3.4.1":
|
||||
version: 3.4.1
|
||||
resolution: "zod-validation-error@npm:3.4.1"
|
||||
peerDependencies:
|
||||
zod: ^3.24.4
|
||||
checksum: cf16f12fccb3e515d18c876c8a75ae4a87219b28e8e7f6334b8d423bebfa2c08b3382d7c53842ba05af8c5caabf66ee8df1ce2862b3b41c2e96eba26e70a995f
|
||||
languageName: node
|
||||
linkType: hard
|
||||
|
||||
"zod@npm:3.22.4":
|
||||
version: 3.22.4
|
||||
resolution: "zod@npm:3.22.4"
|
||||
|
||||
Reference in New Issue
Block a user