I'm using node 18.7 on ubuntu. I'm trying to parse a bunch of csv files to objects (using csv-parse), ultimately to load into a db. Because there are large numbers of these I decided to try streams and I'd like to use the async await style.
So far I have:
const { parse } = require('csv-parse');
const path = __dirname + '/file1.csv';
const opt = { columns: true, relax_column_count: true, skip_empty_lines: true, skip_records_with_error: true };
console.log(path);
const { pipeline } = require('stream');
// const pipeline = stream.pipeline;
async function readByLine(path, opt) {
const readFileStream = fs.createReadStream(path);
var csvParser = parse(opt, function (err, records) {
if (err) throw err;
});
await pipeline(readFileStream, csvParser, (err) => {
if (err) {
console.error('Pipeline failed.', err);
} else {
console.log('Pipeline succeeded.');
}
});
for await (const record of csvParser) {
console.log(record);
}
}
readByLine(path, opt)
When I run this I see:
Pipeline succeeded.
But the parsed objects are not sent to the console. What am I doing wrong?
edit1:
I changed the code to :
async function readByLine(path, opt) {
const readFileStream = fs.createReadStream(path);
var csvParser = parse(opt, function (err, records) {
if (err) throw err;
});
await pipeline(readFileStream, csvParser, (err) => {
if (err) {
console.error('Pipeline failed.', err);
} else {
console.log('Pipeline succeeded.');
}
});
// for await (const record of csvParser) {
// console.log(record);
// }
return csvParser;
}
(async function () {
const o = await readByLine(path, opt);
console.log(o);
})();
The result is an object which has a million properties, but some look set like in the screenshot.
You can only useful await a promise.
The pipeline function you are using doesn't return a promise.
If you look at the documentation you will see:
The pipeline API provides a promise version, which can also receive an options argument as the last parameter with a signal <AbortSignal> property. When the signal is aborted, destroy will be called on the underlying pipeline, with an AbortError.
const { pipeline } = require('node:stream/promises');
const fs = require('node:fs');
const zlib = require('node:zlib');
async function run() {
await pipeline(
fs.createReadStream('archive.tar'),
zlib.createGzip(),
fs.createWriteStream('archive.tar.gz')
);
console.log('Pipeline succeeded.');
}
run().catch(console.error);
Note the different value passed to require. Use that version of pipeline instead.
Related
This is my code, which works fine if i run it from my local using local aws account , but it doesn't work from my dev environment. S3.getobject api doesnt get executed and code prints the next log skipping the getobject call :
const unzipFromS3 = (key) => {
return new Promise(async (resolve, reject) => {
log.info("inside unzipfroms3");
var zlib = require('zlib');
// let fileName = _.replace(key, 'Root/', '');
let options = {
'Bucket': config.bucketName,
'Key': "Root/" + key,
}
log.info("Key:", options);
await s3.getObject(options).on('error', error => {
log.error(error) }).promise().then((res) => {
yauzl.fromBuffer(res.body, { lazyEntries: true }, function (err, zipfile) {
log.info("Inside Yauzl")
if (err) throw err;
zipfile.readEntry();
zipfile.on("entry", function (entry) {
if (/\/$/.test(entry.fileName)) {
zipfile.readEntry();
} else {
zipfile.openReadStream(entry, function (err, readStream) {
if (err) throw err;
// readStream.pipe(fs.createWriteStream(`result/${entry.fileName}`));
readStream
.pipe(uploadFromStream(s3));
function uploadFromStream(s3) {
log.info("Inside uploadFromStream")
var pass = new Stream.PassThrough();
let options = {
'Bucket': config.bucketName,
'Key': entry.fileName,
}
var params = { ...options, Body: pass };
s3.upload(params, function (err, data) {
log.error(err, data);
});
return pass;
}
readStream.on("end", function () {
zipfile.readEntry();
});
});
}
});
});
});
});
};
In order to use await, i.e. the promised based version of S3.getObject(), you must add the promise() method to your method call as explained in the Using JavaScript Promises chapter of the AWS SDK developer guide. Moreover, there is also an Using async/await chapter that you can look into.
In your case, the code can be modified to something like:
await s3.getObject(options).promise()
.then((res) => {
yauzl.fromBuffer(/* more code */);
});
I have the code below that runs fine when run in a standalone file. But when I try to use it in my API endpoint and send a request with the postman, it can't seem to work.
I can't understand why. I also have the same issue when trying to write an excel file with the same API endpoint. If I specify a path, it won't find it. If I just use the filename, it will write fine in the current directory.
What am I missing? the code below is from when I use it in a standalone file. If I use it inside this route below, it won't work.
exports.download_excel = async (req, res) => {....
full code below
// modules import
const path = require('path');
const fs = require('fs');
const util = require('util');
const csv = require('#fast-csv/parse');
const { forEach } = require('p-iteration');
// const path = '../csv/';
const casesBO = [];
const readCSVFiles = async function() {
try {
const allLocalFiles = path.join('csv/');
const readDir = util.promisify(fs.readdir);
await readDir(allLocalFiles, async function(err, file) {
forEach(file, async item => {
fs.createReadStream('csv/' + item)
.pipe(csv.parse({ headers: true, delimiter: ';' }))
.on('error', error => console.error(error))
.on('data', async row => {
if (row['[REGION2]'] !== 'FR') {
casesBO.push(row['[CALLERNO_EMAIL_SOCIAL]']);
console.log(
`${row['[AGENT]']} is ${row['[REGION2]']} and case = ${
row['[CALLERNO_EMAIL_SOCIAL]']
}`
);
}
})
.on('end', async rowCount => {});
});
});
} catch (error) {
console.log(error);
}
};
You are awaiting readDir, but you are also giving it a callback function. You can't both await a Promise and also give it a callback. For that matter, Promises don't take a callback as argument at all.
Also you are writing async everywhere, this is useless for functions that don't await anything inside.
const readCSVFiles = async function () {
try {
const allLocalFiles = path.join('csv/');
const readDir = util.promisify(fs.readdir);
const files = await readDir(allLocalFiles);
for (let file of files) {
fs.createReadStream('csv/' + file)
.pipe(csv.parse({ headers: true, delimiter: ';' }))
.on('error', error => console.error(error))
.on('data', row => {
if (row['[REGION2]'] !== 'FR') {
casesBO.push(row['[CALLERNO_EMAIL_SOCIAL]']);
console.log(
`${row['[AGENT]']} is ${row['[REGION2]']} and case = ${row['[CALLERNO_EMAIL_SOCIAL]']
}`
);
}
})
.on('end', rowCount => { });
}
} catch (error) {
console.log(error);
}
};
I know that old school for loop works in the traditional way - that it waits for the await to finish getting results.
But in my use case, I need to read a file from local/s3 and process it line by line, and for each line I need to call an External API.
Generally I use await inside the loop because all are running inside a lambda and I don't want to use all memory for running it parallelly.
Here I am reading the file using a stream.on() method, and in order to use await inside that, I need to add async in read method, like so:
stream.on('data',async () =>{
while(data=stream.read()!==null){
console.log('line');
const requests = getRequests(); // sync code,no pblms
for(let i=0;i<requests.length;i++){
const result = await apiCall(request[i);
console.log('result from api')
const finalResult = await anotherapiCall(result.data);
}
}
});
This is working but order in which the lines are processed is not guaranteed. I need all in a sync manner. Any help?
Complete Code
async function processSOIFileLocal (options, params) {
console.log('Process SOI file');
const readStream = byline.createStream(fs.createReadStream(key));
readStream.setEncoding('utf8');
const pattern = /^UHL\s|^UTL\s/;
const regExp = new RegExp(pattern);
readStream.on('readable', () => {
let line;
while (null !== (line = readStream.read())) {
if (!regExp.test(line.toString())) {
totalRecordsCount++;
dataObject = soiParser(line);
const { id } = dataObject;
const XMLRequests = createLoSTRequestXML(
options,
{ mapping: event.mapping, row: dataObject }
);
console.log('Read line');
console.log(id);
try {
for (let i = 0;i < XMLRequests.length;i++) {
totalRequestsCount++;
console.log('Sending request');
const response = await sendLoSTRequest(
options,
{ data: XMLRequests[i],
url: LOST_URL }
);
console.log("got response");
const responseObj = await xml2js.
parseStringPromise(response.data);
if (Object.keys(responseObj).indexOf('errors') !== -1) {
fs.writeFileSync(`${ERR_DIR}/${generateKey()}-${id}.xml`, response.data);
failedRequestsCount++;
} else {
successRequestsCount++;
console.log('Response from the Lost Server');
console.log(response[i].data);
}
}
} catch (err) {
console.log(err);
}
}
}
})
.on('end', () => {
console.log('file processed');
console.log(`
************************************************
Total Records Processed:${totalRecordsCount}
Total Requests Sent: ${totalRequestsCount}
Success Requests: ${successRequestsCount}
Failed Requests: ${failedRequestsCount}
************************************************
`);
});
}
async function sendLoSTRequest (options, params) {
const { axios } = options;
const { url, data } = params;
if (url) {
return axios.post(url, data);
// eslint-disable-next-line no-else-return
} else {
console.log('URL is not found');
return null;
}
}
Code needs to flow like so:
read a line in a sync way
process the line and transform the line into an array of two members
for every member call API and do stuff
once line is complete, look for another line, all done in order
UPDATE: Got a workaround..but it fires stream.end() without waiting stream to finish read
async function processSOIFileLocal (options, params) {
console.log('Process SOI file');
const { ERR_DIR, fs, xml2js, LOST_URL, byline, event } = options;
const { key } = params;
const responseObject = {};
let totalRecordsCount = 0;
let totalRequestsCount = 0;
let failedRequestsCount = 0;
let successRequestsCount = 0;
let dataObject = {};
const queue = (() => {
let q = Promise.resolve();
return fn => (q = q.then(fn));
})();
const readStream = byline.createStream(fs.createReadStream(key));
readStream.setEncoding('utf8');
const pattern = /^UHL\s|^UTL\s/;
const regExp = new RegExp(pattern);
readStream.on('readable', () => {
let line;
while (null !== (line = readStream.read())) {
if (!regExp.test(line.toString())) {
totalRecordsCount++;
dataObject = soiParser(line);
const { id } = dataObject;
const XMLRequests = createLoSTRequestXML(
options,
{ mapping: event.mapping, row: dataObject }
);
// eslint-disable-next-line no-loop-func
queue(async () => {
try {
for (let i = 0;i < XMLRequests.length;i++) {
console.log('Sending request');
console.log(id);
totalRequestsCount++;
const response = await sendLoSTRequest(
options,
{ data: XMLRequests[i],
url: LOST_URL }
);
console.log('got response');
const responseObj = await xml2js.
parseStringPromise(response.data);
if (Object.keys(responseObj).indexOf('errors') !== -1) {
// console.log('Response have the error:');
// await handleError(options, { err: responseObj, id });
failedRequestsCount++;
fs.writeFileSync(`${ERR_DIR}/${generateKey()}-${id}.xml`, response.data);
} else {
console.log('Response from the Lost Server');
console.log(response[i].data);
successRequestsCount++;
}
}
} catch (err) {
console.log(err);
}
});
}
}
})
.on('end', () => {
console.log('file processed');
console.log(`
************************************************
Total Records Processed:${totalRecordsCount}
Total Requests Sent: ${totalRequestsCount}
Success Requests: ${successRequestsCount}
Failed Requests: ${failedRequestsCount}
************************************************
`);
Object.assign(responseObject, {
failedRequestsCount,
successRequestsCount,
totalRecordsCount,
totalRequestsCount
});
});
}
Thank You
The sample code at the top of your question could be rewritten like
const queue = (() => {
let q = Promise.resolve();
return (fn) => (q = q.then(fn));
})();
stream.on('data', async() => {
while (data = stream.read() !== null) {
console.log('line');
const requests = getRequests(); // sync code,no pblms
queue(async () => {
for (let i = 0; i < requests.length; i++) {
const result = await apiCall(request[i]);
console.log('result from api');
const finalResult = await anotherapiCall(result.data);
}
});
}
});
Hopefully that will be useful for the complete code
If anyone want a solution for synchronisely process the file, ie, linebyline read and execute some Async call, it's recommended to use inbuilt stream transform. There we can create a transform function and return a callback when finishes.
That's will help of any one face this issues.
Through2 is a small npm library that also can be used for the same.
Here is the index.ts script I am running (based on something I found on reddit):
const path = require("path");
const sql = require("mssql");
const config = require(path.resolve("./config.json"));
let db1;
const connect = () => {
return new Promise((resolve, reject) => {
db1 = new sql.ConnectionPool(config.db, err => {
if (err) {
console.error("Connection failed.", err);
reject(err);
} else {
console.log("Database pool #1 connected.");
resolve();
}
});
});
};
const selectProjects = async (name) => {
const query = `
select * from [Time].ProjectData where [Name] like concat('%', concat(#name, '%'))`;
const request = new sql.Request(db1);
const result = await request
.input("name", name)
.query(query);
return result.recordset;
};
module.exports = {
connect,
selectProjects
};
connect().then(function() {
console.log(selectProjects('General'));
}).catch(function(err) {
console.log(err);
});
When I run the script using node index (after compiling it of course), I get this in the console:
Database pool #1 connected.
Promise { <pending> }
And then the script hangs.
Apparently the await keyword creates an implicit promise; I had to change the last function call to:
connect().then(function() {
selectProjects('General').then(function(data) {
console.log(data);
});
}).catch(function(err) {
console.log(err);
});
I'm building a server in Node that will search a folder to see if an XML file exists (glob), and if it does, read the file in (fs) as a JSON object (xml2js) and eventually store it in a database somewhere. I'm want to get the results OUT of the parser and into another variable so I can do other things with the data. From what I can tell, something is running synchronously, but I can't figure out how to stop it and for me to wait until it's finished to continue moving on.
I'm separating my function out into a controller elsewhere from app.js:
app.controller.js
const fs = require('fs-extra');
const glob = require('glob');
const xml2js = require('xml2js');
exports.requests = {};
exports.checkFileDrop = async () => {
console.log('Checking for xml in filedrop...');
// this is the only place await works...
await glob('./filedrop/ALLREQUESTS-*.xml', (err, files) => {
var parser = new xml2js.Parser();
// this is looking for a specific file now, which I'll address later once I can figure out this issue
fs.readFile('./filedrop/ALLREQUESTS-20170707.xml', 'utf16le', function (err, data) {
if (err) {
console.log('ERROR: ', err);
} else {
parser.parseString(data, (err, result) => {
if (err) {
console.log('ERROR: ', err);
} else {
console.log('data found');
exports.requests = JSON.stringify(result.Records.Record);
// data is outputted here correctly
console.log(exports.requests);
// this doesn't even seem to want to save to exports.requests anyways...
}
});
}
});
});
}
app.js
const appController = require('./controllers/app.controller');
// check if there is file in filedrop
appController.checkFileDrop();
// prints out an empty object
console.log(appController.requests);
// can't do anything if it doesn't exist yet
appController.saveToDB(appController.requests);
await will wait for a Promise value to resolve, otherwise it'll just wrap the value it is given in a promise and resolve the promise right away. In your example,
await glob('./filedrop/ALLREQUESTS-*.xml', (err, files) => {
the call to glob does not return a Promise, so the await is essentially useless. So you need to create the promise yourself.
exports.checkFileDrop = async () => {
console.log('Checking for xml in filedrop...');
const files = await new Promise((resolve, reject) => glob('./filedrop/ALLREQUESTS-*.xml', (err, files) => {
if (err) reject(err);
else resolve(files);
});
const parser = new xml2js.Parser();
const data = await new Promise((resolve, reject) => fs.readFile('./filedrop/ALLREQUESTS-20170707.xml', 'utf16le', function (err, data) {
if (err) reject(err);
else resolve(data);
});
const result = await new Promise((resolve, reject) => parser.parseString(data, (err, result) => {
if (err) reject(err);
else resolve(result);
});
console.log('data found');
const requests = JSON.stringify(result.Records.Record);
console.log(requests);
}
Note that now this function will reject the promise it returns instead of force-logging the error.
You can also condense this down with a helper. Node 8 for instance includes util.promisify to make code like this easier to write, e.g.
const util = require('util');
exports.checkFileDrop = async () => {
console.log('Checking for xml in filedrop...');
const files = await util.promisify(glob)('./filedrop/ALLREQUESTS-*.xml');
const parser = new xml2js.Parser();
const data = await util.promisify(fs.readFile)('./filedrop/ALLREQUESTS-20170707.xml', 'utf16le');
const result = await util.promisify(parser.parseString.bind(parser))(data);
console.log('data found');
const requests = JSON.stringify(result.Records.Record);
console.log(requests);
}
You can use async/await
import fs from 'fs';
import { promisify } from 'util';
const xmlToJson = async filePath => {
const parser = new xml2js.Parser
try {
const data = await fs.promises.readFile(filePath, 'utf8')
const result = await promisify(parser.parseString)(data);
const requests = JSON.stringify(result.merchandiser.product);
return requests
}
catch(err) {
console.log(err)
}
}