await for function with callback - javascript

I'm playing with streams and async/await functionality. What I have so far is:
let logRecord = ((record, callback) => {
console.log(record);
return callback();
});
let importCSVfromPath = async((csv_path) => {
return new Promise(function(resolve, reject) {
var parser = parse();
var input = fs.createReadStream(csv_path);
var transformer = transform(logRecord, {parallel: 1});
input.on('error', (err) => {
reject(err);
});
input.on('finish', ()=> {
resolve();
});
input.pipe(parser).pipe(transformer);
});
});
Now I want to replace logRecord with importRecord. The problem is that this function has to use functions that are already part of the async stack.
let importRecord = async( (record) => {
.......
await(insertRow(row));
});
What's the right way to do this?

It's slightly more complicated than this - node.js streams are not adapted (at least not yet) to the es7 async/await methods.
If you'd like to develop this on your own, consider writing a class derived from Readable stream. Implementing a promise based interface is quite a task, but it is possible.
If you're however fine with using a permissive licensed framework - take a look at Scramjet. With it your code will look like this (most of the example is parsing the CSV - I'll add a helper in the next version):
fs.createReadStream("file.csv") // open your file
.pipe(new StringStream()) // pass to scramjet
.split("\n") // split by line
.parse((line) => line.split(",")) // convert lines to arrays
.map(async (line) => { // run asynchrounous mapping
await importRecord(line); // import log to DB
return logRecord(line); // return some log for the output
})
.pipe(process.stdout); // pipe the output wherever you like
I believe it's exactly what you're looking for and it will run your record imports in parallel, while keeping the output order.

Related

Asyncronicity in a reduce() function WITHOUT using async/await

I am patching the exec() function to allow subpopulating in Mongoose, which is why I am not able to use async/await here -- my function will be chained off a db call, so there is no opportunity to call await on it, and within the submodule itself, there I can't add async/await outside of an async function itself.
With that out of the way, let's look at what I'm trying to do. I have two separate arrays (matchingMealPlanFoods and matchingMealPlanRecipeFoods) full of IDs that I need to populate. Both of them reside on the same array, foods. They each require a db call with aggregation, and the problem in my current scenario is that only one of the arrays populates because they are happening asynchronously.
What I am trying to do now is use the reduce function to return the updated foods array to the next run of reduce so that when the final result is returned, I can replace the entire foods array once on my doc. The problem of course is that my aggregate/exec has not yet returned a value by the time the reduce function goes into its next run. Is there a way I can achieve this without async/await here? I'm including the high-level structure here so you can see what needs to happen, and why using .then() is probably not viable.
EDIT: Updating code with async suggestion
function execute(model, docs, options, lean, cb) {
options = formatOptions(options);
let resolvedCount = 0;
let error = false;
(async () => {
for (let doc of docs) {
let newFoodsArray = [...doc.foods];
for (let option of options) {
const path = option.path.split(".");
// ... various things happen here to prep the data
const aggregationOptions = [
// // $match, then $unwind, then $replaceRoot
];
await rootRefModel
.aggregate(aggregationOptions)
.exec((err, refSubDocuments) => {
// more stuff happens
console.log('newFoodsArray', newFoodsArray); // this is to check whether the second iteration is using the updated newFoods Array
const arrToReturn = newFoodsArray.map((food) => {
const newMatchingArray = food[nests[1]].map((matchingFood) => {
//more stuff
return matchingFood;
});
const updatedFood = food;
updatedFood[`${nests[1]}`] = newMatchingArray;
return updatedFood;
});
console.log('arrToReturn', arrToReturn);
newFoodsArray = [...arrToReturn];
});
}
};
console.log('finalNewFoods', newFoodsArray); // this should log after the other two, but it is logging first.
const document = doc.toObject();
document.foods = newFoodsArray;
if (resolvedCount === options.length) cb(null, [document]);
}
})()
EDIT: Since it seems it will help, here is the what is calling the execute function I have excerpted above.
/**
* This will populate sub refs
* #param {import('mongoose').ModelPopulateOptions[]|
* import('mongoose').ModelPopulateOptions|String[]|String} options
* #returns {Promise}
*/
schema.methods.subPopulate = function (options = null) {
const model = this.constructor;
if (options) {
return new Promise((resolve, reject) => execute(model, [this], options, false, (err, docs) => {
if (err) return reject(err);
return resolve(docs[0]);
}));
}
Promise.resolve();
};
};
We can use async/await just fine here, as long as we remember that async is the same as "returning a Promise" and await is the same as "resolving a Promise's .then or .catch".
So let's turn all those "synchronous but callback-based" calls into awaitables: your outer code has to keep obeying the API contract, but since it's not meant to a return a value, we can safely mark our own version of it as async, and then we can use await in combination with promises around any other callback based function calls in our own code just fine:
async function execute(model, docs, options, lean, andThenContinueToThis) {
options = formatOptions(options);
let option, resolvedCount = 0;
for (let doc of docs) {
let newFoodsArray = [...doc.foods];
for (option of options) {
// ...things happen here...
const aggregationOptions = [/*...data...*/];
try {
const refSubDocuments = await new Promise((resolve, reject) => rootRefModel
.aggregate(aggregationOptions)
.exec((err, result) => err ? reject(err) : resolve(result));
// ...do some work based on refSubDocuments...
}
// remember to forward errors and then stop:
catch (err) {
return andThenContinueToThis(err);
}
}
// remember: bind newFoodsArray somewhere so it doesn't get lost next iteration
}
// As our absolutely last action, when all went well, we trigger the call forwarding:
andThenContinueToThis(null, dataToForward);
}

fs.readdirSync does not always return all the contents of a folder

So I have a function that is supposed to recursively return all the files in a folder, here it is:
async function getFiles(dir) {
const subdirs = await fs.readdirSync(dir);
const files = await Promise.all(
subdirs.map(async (subdir) => {
const res = resolve(dir, subdir);
return (await stat(res)).isDirectory() && !subdir.startsWith("__")
? getFiles(res)
: res;
})
);
return files.reduce((a, f) => a.concat(f), files);
}
Looks great, right? Works fine too, except, not always. I'm calling it in a pretty straightforward fashion like getFiles("./directory"), and half the time, it returns all the contents. But sometimes, it will omit contents of one subdirectory, while returning all the others.
So, let's say if the given directory has 5 subdirectories, it will only return the contents of 4. This happens infrequently and if there is some underlying pattern, I am not able to detect it. Please help!
Your code is a bit misguided for a number of reasons:
You're mixing synchronous file I/O calls with promises. There's no reason to use promises if your code is entirely synchronous. That just makes things more complicated than needed.
It's unclear what the call to resolve(dir, subdir) is supposed to do. If you're trying to make a full path, you should be using path.join(dir, subdir).
You should be using the withFileTypes option with readdir() as that saves extra roundtrips to the file system so you can just immediately check if each file is a file or directory.
You don't use await with synchronous functions.
So, if you're doing a synchronous version, you can just do this:
const fs = require('fs');
const path = require('path');
function getFilesSync(dir, files = []) {
const listing = fs.readdirSync(dir, {withFileTypes:true});
let dirs = [];
for (let f of listing) {
const fullName = path.join(dir, f.name);
if (f.isFile()) {
files.push(fullName);
} else if (f.isDirectory()) {
dirs.push(fullName);
}
}
for (let d of dirs) {
getFilesSync(d, files);
}
return files;
}
let files = getFilesSync(somePath);
console.log(files);
If you wanted an asynchronous version using promises, then you can do this:
const fsp = require('fs').promises;
const path = require('path');
async function getFiles(dir, files = []) {
const listing = await fsp.readdir(dir, {withFileTypes: true});
let dirs = [];
for (let f of listing) {
const fullName = path.join(dir, f.name);
if (f.isFile()) {
files.push(fullName);
} else if (f.isDirectory()) {
dirs.push(fullName);
}
}
for (let d of dirs) {
await getFiles(d, files);
}
return files;
}
getFiles(somePath).then(files => {
console.log(files);
}).catch(err => {
console.log(err);
});
Note how using the fs.promises interface along with async/await allows the asynchronous version to be very, very similar to the synchronous version.
I see your code has a subdir.startsWith("__") test in it. I don't know exactly what you were trying to do with that. You can add that into the logic I have if that's required.
I would have put this as a comment but i do not have enough reputation :s
I'm not really clear with the async / await methods for promise so i'm not really sure about what i'm saying!
So maybe an error is occuring, but you can't see it because you don't reject or catch nothing.
I guess that with the async/await methods, an error would be rejected in your const, and then you can console.log() your const to see if, when your function omit some files, it's not because of an error that occured!
And for your last await, you put it in a return, it would be interesting to console.log() it too.
/////////////////////edited later ////////////////////////////////////////
https://javascript.info/async-await
In real situations, the promise may take some time before it rejects. In that case there will be a delay before await throws an error.
We can catch that error using try..catch, the same way as a regular throw:
try {
let response = await fetch('http://no-such-url');
} catch(err) {
alert(err); // TypeError: failed to fetch
}
}

How to avoid an infinite loop in JavaScript

I have a Selenium webdriverIO V5 framework. The issue I am facing here is, the below code works fine on Mac OS, but it does not work correctly on the Windows OS. In the Windows OS it gets stuck with an infinite loop issue.
The below code functionality is: Merge yaml files (which contains locators) and return the value of the locator by passing the key:
const glob = require('glob');
const yamlMerge = require('yaml-merge');
const sleep = require('system-sleep');
let xpath;
class Page {
getElements(elementId) {
function objectCollector() {
glob('tests/wdio/locators/*.yml', function (er, files) {
if (er) throw er;
xpath = yamlMerge.mergeFiles(files);
});
do {
sleep(10);
} while (xpath === undefined);
return xpath;
}
objectCollector();
return xpath[elementId];
}
}
module.exports = new Page();
Since you are waiting on the results of a callback, I would recommend returning a new Promise from your getElements function and resolve() the value you receive inside the callback. Then when you call getElements, you will need to resolve that Promise or use the await notation. The function will stop at that point and wait until the Promise resolves, but the event loop will still continue. See some documentation for more information.
I'll write an example below of what your code might look like using a Promise, but when you call getElements, you will need to put the keyword await before it. If you want to avoid that, you could resolve the Promise from objectCollector while you're in getElements and remove the async keyword from its definition, but you really should not get in the way of asynchronous JavaScript. Also, you can probably shorten the code a bit because objectCollector looks like an unnecessary function in this example:
const glob = require('glob')
const yamlMerge = require('yaml-merge')
const sleep = require('system-sleep')
let xpath
class Page {
function async getElements(elementId) {
function objectCollector() {
return new Promise((resolve,reject) => {
glob('tests/wdio/locators/*.yml', function (er, files) {
if (er) reject(er)
resolve(yamlMerge.mergeFiles(files))
})
})
}
let xpath = await objectCollector()
return xpath[elementId]
}
}
module.exports = new Page();

d3.csv: Finding data error [duplicate]

I've been using d3.js v4 for sometime now and I've learned that Mike Bostock has replaced the d3.queue in the v5 release with the Promise native JavaScript object. I would like to check with you if this code that I have written is properly queuing (asynchronously) these URL's:
var makeRequest = function() {
"use strict";
var bli = [
"http://stats.oecd.org/sdmx-json/data/BLI2013/all/all",
"http://stats.oecd.org/sdmx-json/data/BLI2014/all/all",
"http://stats.oecd.org/sdmx-json/data/BLI2015/all/all",
"http://stats.oecd.org/sdmx-json/data/BLI2016/all/all",
"http://stats.oecd.org/sdmx-json/data/BLI/all/all"
];
var promises = [];
bli.forEach(function(url) {
promises.push(
new Promise(function(resolve, reject) {
d3
.json(url)
.then(function(response) {
resolve(response);
})
.catch(function(error) {
console.log("Error on: " + url + ". Error: " + error);
reject(error);
});
})
);
});
Promise.all(promises).then(function(values) {
console.log(values);
});
};
makeRequest();
The code seems to function properly, but, is this proper code or is there a better way (a best practice approach) for queuing with Promise.all and d3.js? Is the catch error properly implemented?
You can simplify that code a lot: you don't net to use new Promise with d3.json, since d3.json will itself create the promise.
So, you can just do:
var files = ["data1.json", "data2.json", "data3.json"];
var promises = [];
files.forEach(function(url) {
promises.push(d3.json(url))
});
Promise.all(promises).then(function(values) {
console.log(values)
});
Or, if you're into the code golf, even shorter:
var files = ["data1.json", "data2.json", "data3.json"];
Promise.all(files.map(url => d3.json(url))).then(function(values) {
console.log(values)
});
Since I cannot use JSON files in the S.O. snippet, check the console in this bl.ocks: https://bl.ocks.org/GerardoFurtado/f08993c9c729b0b3452ef1803ad9dcbf/c4b45c5acce6033085a667cbb7d34203d15de0f0
Here's an approach with ES6 async functions and ES6 array destructuring:
async function chart() {
const [first, second] = await Promise.all([
d3.json('data1.json'),
d3.json('data2.json'),
])
console.log('data2.json :', second)
}
chart()
You can also add a formatting function for your data as such if you want to clean up your data to your preference.
.then() will have your data in a nice array which you can use later.
const myData = d3.csv("data.csv", formatterFunction)
.then(data => /* do whatever*/ )
function formatterFunction(row){
// do formatting
return // formatted data
}

Is it possible to asynchronously collect items from a generator into an array?

I'm playing around with writing a web service using Node.js/Express which generates some objects based on templates and then returns the generated data. I'm using Bluebird promises to manage all the async logic. After stripping out all the unimportant stuff, my code looks something like this[1].
My problem is the core logic can block for several seconds if the requested number of output elements is large. Since I've been playing with ES6 for this project, my first thought was to factor out the element creation into a generator[2]. However, the only way I can find to get all the results from this generator is Array.from, which doesn't help with the blocking.
I've played around with .map, .all, .coroutine, and a couple of other things, in an attempt to asynchronously collect the results from the generator, but I haven't had any luck. Is there any nice way to do this with Bluebird? (Or perhaps a better way of doing it altogether?)
Native ES6 Promise.all can take an iterator and give back an array of values, but V8 doesn't support this yet. Also, in my experimentation with polyfills/Firefox, it seems to be synchronous.
This is not-too-common operation, so I don't care much about absolute performance. I just want to avoid blocking the event queue, and I would prefer a nice, easy to read and maintain solution.
[1]:
let Bluebird = require('bluebird');
let templates = ...; // logic to load data templates
function createRandomElementFromRandomTemplate(templates) {
let el;
// synchronous work that can take a couple of milliseconds...
return el;
};
api.createRandomElements = function(req, res) {
let numEls = req.params.numEls;
Bluebird.resolve(templates)
.then(templates => {
let elements = [];
// numEls could potentially be several thousand
for(let i = 0; i < numEls; ++i) {
elements.push(createRandomElementFromRandomTemplate(templates));
}
return elements;
})
.then(elements => {
res.json(elements);
})
.error(err => {
res.status(500).json(err);
});
}
[2]:
function* generateRandomElementsFromRandomTemplate(templates, numEls) {
for(let i = 0; i < numEls; ++i) {
let el;
// synchronous work that can take a couple of milliseconds...
yield el;
}
}
api.createRandomElements = function(req, res) {
let numEls = req.params.numEls;
Bluebird.resolve(templates)
.then(templates => {
// this still blocks
return Array.from(generateRandomElementsFromRandomTemplate(templates, numEls));
})
.then(elements => {
res.json(elements);
})
.error(err => {
res.status(500).json(err);
});
}
Here's a halfway-decent solution I found after looking more closely at Bluebird's .map() as Benjamin suggested. I still have the feeling I'm missing something, though.
The main reason I started with Bluebird was because of Mongoose, so I left a bit of that in for a more realistic sample.
let Bluebird = require('bluebird');
let mongoose = require('mongoose');
Bluebird.promisifyAll(mongoose);
const Template = mongoose.models.Template,
UserPref = mongoose.models.UserPref;
// just a normal function that generates one element with a random choice of template
function createRandomElementFromRandomTemplate(templates, userPrefs) {
let el;
// synchronous work that can take a couple of milliseconds...
return el;
}
api.generate = function(req, res) {
let userId = req.params.userId;
let numRecord = req.params.numRecords
let data;
Bluebird.props({
userprefs: UserPref.findOneAsync({userId: userId}),
templates: Template.findAsync({})
})
.then(_data => {
data = _data;
// use a sparse array to convince .map() to loop the desired number of times
return Array(numRecords);
})
.map(() => {
// ignore the parameter map passes in - we're using the exact same data in each iteration
// generate one item each time and let Bluebird collect them into an array
// I think this could work just as easily with a coroutine
return Bluebird.delay(createRandomElementFromRandomTemplate(data.templates, data.userprefs), 0);
}, {concurrency: 5})
.then(generated => {
return Generated.createAsync(generated);
})
.then(results => {
res.json(results);
})
.catch(err => {
console.log(err);
res.status(500);
});
};

Categories

Resources