Function iteration with Async waterfall in Node.js - javascript

I am trying to get resource from the some url, that resource is separated as multiple pages, the number of page is over 500, but the order of resource have to be guaranteed, so I decided to use Async module.
function getData(page, callback){
var url = "http://someurl.com/document?page="+page;
// get data from the url ...
// when success,
callback();
};
So, above function is get resource from some url, I have to iterate this function to many times, but I don't know how can I iterate this with async waterfall. What point I should push the for iteration?
async.waterfall([
// page 1
function(callback){
getData(1, function(){
callback(null);
});
},
// page 2
function(callback){
getData(2, function(){
callback(null);
});
},
// page 3, 4, 5..... 100, ... 500
],function(err, result){
if(err) return next();
console.log('finish !');
});

Why don't you use Promises:
function getData(page, callback){
var url = "http://someurl.com/document?page="+page;
// var request = call to get data from the url ...
return request;
};
var promises = [];
for (var page = 1; page < totalPages; page++) {
promises.push(getData(page));
}
Promise.all(promises).then(function (listOfResults) {
// Do callback here
});
Just make sure that your method of sending the get request returns a promise. If not you can use a function like this:
function usesCallback(data, callback) {
// Do something with data
callback();
}
function makePromise(data) {
return new Promise(function (resolve, reject) {
usesCallback(data, resolve);
});
}
Here is some more info on Promises

If you do want to use async, async.map() and async.mapLimit() are better choices since they apply the iteratee to each item in parallel and guarantee the results array to be in the same order as the original collection.
async.mapLimit(_.range(1, 500), 10, getData, function (err, results) {
// do something with results
});
The code above means to get data from page 1 to page 500, with no more than 10 async operations at a time.
_.range(1, 500) is a function from underscore to generate an array [1, 2, 3, ..., 500]. If you do not like to use underscore in your project, you can simply rewrite it such as:
function range(lower, upper) {
return Array.apply(null, Array(upper - lower + 1))
.map(function (_, i) { return lower + i; });
}

You can use async's whilst for this check here.

Related

Wating for all finished request in a loop with node request

I use the node request ajax package. So, i have an loop, in every iteration it makes an request to my server.
// realItems needs the complete value of items assigned
var realItems;
var items = [];
_.forEach(JSON.parse(body), (value, key) => {
request('myurl/' + id, (error, response, body) => {
items = JSON.parse(body)
});
});
How can i bundle all my requests from request package, so I can assign the value of items variable to the realItems at the end?
// edit:
I use react js, so in this case realItems is an state, and i can't trigger it in every loop iteration, because render triggers on every setState
There are a number of ways to approach this. Here's a brute force method that does not preserve the order of the results:
var items = [];
var cnt = 0;
_.forEach(JSON.parse(body), (value, key) => {
++cnt;
request('myurl/' + value.id, (error, response, body) => {
items.push(JSON.parse(body));
// if all requesets are done
if (--cnt === 0) {
// process items here as all results are done now
}
});
});
Here's a version that uses Bluebird promises:
var Promise = require('bluebird');
var request = Promise.promisify(require("request"));
Promise.promisifyAll(request);
var promises = [];
_.forEach(JSON.parse(body), (value, key) => {
promises.push(request('myurl/' + value.id));
});
Promise.all(promises).then(function(results) {
// all requests are done, data from all requests is in the results array
// and are in the order that the requests were originally made
});
And, here's a little bit simpler Bluebird promises method that uses a Bluebird iterator:
var Promise = require('bluebird');
var request = Promise.promisify(require("request"));
Promise.promisifyAll(request);
Promise.map(JSON.parse(body), function(value) {
return request('myurl/' + value.id);
}).then(function(results) {
// all requests are done, data is in the results array
});
Is it a requirement that you use the request package? I use async which is similar and comes with a parallel method which does exactly what you're asking -
https://github.com/caolan/async#parallel
example:
async.parallel([
function(callback){
setTimeout(function(){
callback(null, 'one');
}, 200);
},
function(callback){
setTimeout(function(){
callback(null, 'two');
}, 100);
}
],
// optional callback
function(err, results){
// the results array will equal ['one','two'] even though
// the second function had a shorter timeout.
});

NodeJS/Waterline, Need to wait for multiple create end before response

In my nodejs app (using sailsjs), I use a controller to receive multiple images uploaded in a .zip, then I save them in my database using waterline:
var images = [];
zipEntries.forEach(function(zipEntry) {
zip.extractEntryTo(zipEntry.entryName, p, false, true);
Image.create({
filename: zipEntry.name,
}).exec(function(err, image){
images.push(image);
});
});
res.json(images);
The problem is I need to send back all the Image Ids generated (auto incremented), but the create method is asynchronous.Is there any way to wait for all create method end before I can send the server response ?
EDIT: So I found a work around by checking the number of images that I get and increment an Index in the .exec method, I can check if this is the last exec and then send the response.
var images = [],
nbValidImage = 0,
i = 0;
zipEntries.forEach(function(zipEntry) {
zip.extractEntryTo(zipEntry.entryName, p, false, true);
nbValidImage++;
Image.create({
filename: zipEntry.name,
}).exec(function(err, image){
i++;
images.push(image);
if(i == nbValidImage) {
res.json(images);
}
});
});
But if someone has a better solution... :)
In this case, I would go with async.each, since zipEntries is a collection (didn't test this code):
var async = require('async');
async.each(zipEntries, function (zipEntry, callback) {
zip.extractEntryTo(zipEntry.entryName, p, false, true);
Image.create({
filename: zipEntry.name
}).exec(function (err, image) {
images.push(image);
callback();
});
}, function (err) {
if (error) throw new Error(error);
res.json(images);
});
I would say you want to checkout promises or async.
Async has a parallel function I find useful in this situation.
https://github.com/caolan/async#paralleltasks-callback
It allows you to run multiple async functions at once then a callback is called when all functions are done.
I think it would work perfectly for your needs.
Here is an example from their docs:
var async = require('async');
async.parallel([
function(callback){
setTimeout(function(){
callback(null, 'one');
}, 200);
},
function(callback){
setTimeout(function(){
callback(null, 'two');
}, 100);
}
],
// optional callback
function(err, results){
// the results array will equal ['one','two'] even though
// the second function had a shorter timeout.
});

Asynchronous Calls and Recursion with Node.js

I'm looking to execute a callback upon the full completion of a recursive function that can go on for an undetermined amount of time. I'm struggling with async issues and was hoping to get some help here. The code, using the request module, is as follows:
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, callback);
});
};
var recurse = function(startingPlace, callback) {
request.get({
url: 'bbb'
}, function(error, response, body) {
// store body somewhere outside these funtions
// make second request
request.get({
url: 'ccc'
}, function(error, response, body) {
var anArray = JSON.parse(body).stuff;
if (anArray) {
anArray.forEach(function(thing) {
request.get({
url: 'ddd'
}, function(error, response, body) {
var nextPlace = JSON.parse(body).place;
recurse(nextPlace);
});
})
}
});
});
callback();
};
start(function() {
// calls final function to print out results from storage that gets updated each recursive call
finalFunction();
});
It seems that once my code goes past the for loop in the nested requests, it continues out of the request and ends the initial function call while the recursive calls are still going on. I want it to not finish the highest-level iteration until all the nested recursive calls have completed (which I have no way of knowing how many there are).
Any help is GREATLY appreciated!
In your example you have no recursive calls. If I understand correctly you want to say that recurse(point, otherFunc); is the beginning of a recursive call.
Then just go back to the definition of the recursive call (which you have not shown in your post) and do this (add a third argument for a callback function to be called in the end of recursion; the caller will pass it as a parameter):
function recurse(startingPlace, otherFunc, callback_one) {
// code you may have ...
if (your_terminating_criterion === true) {
return callback_one(val); // where val is potentially some value you want to return (or a json object with results)
}
// more code you may have
}
Then in the original code that you posted, make this call instead (in the inner-most part):
recurse(startingPlace, otherFunc, function (results) {
// results is now a variable with the data returned at the end of recursion
console.log ("Recursion finished with results " + results);
callback(); // the callback that you wanted to call right from the beginning
});
Just spend some time and try to understand my explanation. When you understand, then you will know node. This is the node philosophy in one post. I hope it is clear. Your very first example should look like this:
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, otherFunc, function (results) {
console.log ("Recursion finished with results " + results);
callback();
});
});
};
Below is only additional information in case you are interested. Otherwise you are set with the above.
Typically in node.js though, people return an error value as well, so that the caller knows if the function that was called has finished successfully. There is no big mystery here. Instead of returning just results people make a call of the form
return callback_one(null, val);
Then in the other function you can have:
recurse(startingPlace, otherFunc, function (recError, results) {
if (recErr) {
// treat the error from recursion
return callback(); // important: use return, otherwise you will keep on executing whatever is there after the if part when the callback ends ;)
}
// No problems/errors
console.log ("Recursion finished with results " + results);
callback(); // writing down `return callback();` is not a bad habit when you want to stop execution there and actually call the callback()
});
Update with my suggestion
This is my suggestion for the recursive function, but before that, it looks like you need to define your own get:
function myGet (a, callback) {
request.get(a, function (error, response, body) {
var nextPlace = JSON.parse(body).place;
return callback(null, nextPlace); // null for no errors, and return the nextPlace to async
});
}
var recurse = function(startingPlace, callback2) {
request.get({
url: 'bbb'
}, function(error1, response1, body1) {
// store body somewhere outside these funtions
// make second request
request.get({
url: 'ccc'
}, function(error2, response2, body2) {
var anArray = JSON.parse(body2).stuff;
if (anArray) {
// The function that you want to call for each element of the array is `get`.
// So, prepare these calls, but you also need to pass different arguments
// and this is where `bind` comes into the picture and the link that I gave earlier.
var theParallelCalls = [];
for (var i = 0; i < anArray.length; i++) {
theParallelCalls.push(myGet.bind(null, {url: 'ddd'})); // Here, during the execution, parallel will pass its own callback as third argument of `myGet`; this is why we have callback and callback2 in the code
}
// Now perform the parallel calls:
async.parallel(theParallelCalls, function (error3, results) {
// All the parallel calls have returned
for (var i = 0; i < results.length; i++) {
var nextPlace = results[i];
recurse(nextPlace, callback2);
}
});
} else {
return callback2(null);
}
});
});
};
Note that I assume that the get request for 'bbb' is always followed by a get request for 'ccc'. In other words, you have not hidden a return point for the recursive calls where you have the comments.
Typically when you write a recursive function it will do something and then either call itself or return.
You need to define callback in the scope of the recursive function (i.e. recurse instead of start), and you need to call it at the point where you would normally return.
So, a hypothetical example would look something like:
get_all_pages(callback, page) {
page = page || 1;
request.get({
url: "http://example.com/getPage.php",
data: { page_number: 1 },
success: function (data) {
if (data.is_last_page) {
// We are at the end so we call the callback
callback(page);
} else {
// We are not at the end so we recurse
get_all_pages(callback, page + 1);
}
}
}
}
function show_page_count(data) {
alert(data);
}
get_all_pages(show_page_count);
I think you might find caolan/async useful. Look especially into async.waterfall. It will allow you to pass results from a callback from another and when done, do something with the results.
Example:
async.waterfall([
function(cb) {
request.get({
url: 'aaa.com'
}, function(err, res, body) {
if(err) {
return cb(err);
}
cb(null, JSON.parse(body).id);
});
},
function(id, cb) {
// do that otherFunc now
// ...
cb(); // remember to pass result here
}
], function (err, result) {
// do something with possible error and result now
});
If your recursive function is synchronous, just call the callback on the next line:
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, otherFunc);
// Call output function AFTER recursion has completed
callback();
});
};
Else you need to keep a reference to the callback in your recursive function.
Pass the callback as an argument to the function and call it whenever it is finished.
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, otherFunc, callback);
});
};
Build your code from this example:
var udpate = function (callback){
//Do stuff
callback(null);
}
function doUpdate() {
update(updateDone)
}
function updateDone(err) {
if (err)
throw err;
else
doUpdate()
}
doUpdate();
With ES6, 'es6-deferred' & 'q'. You could try as following,
var Q = require('q');
var Deferred = require('es6-deferred');
const process = (id) => {
var request = new Deferred();
const ids =//do something and get the data;
const subPromises = ids.map(id => process(id));
Q.all(subPromises).then(function () {
request.resolve();
})
.catch(error => {
console.log(error);
});
return request.promise
}
process("testId").then(() => {
console.log("done");
});

a way to know when all callbacks are done in javascript

I have many calls to a service at the end of which i want to write to a file my final collection when all the callbacks of the service have returned.
is there there a way to be sure that all callbacks are done ?
for (id in idsCollection) {
object.callService(id, function (res) {
collection.push(res);
});
}
filewriter.writetoFile("filename.json", JSon.Stringify(collection));
EDIT : just for the record i'm using cheerio with nodeJS.
Create an array. Push something onto the array each time you set up a callback. Pop something off it each time the callback runs. Check to see if the array is empty inside the callback function. If it is empty, then all the callbacks are done.
I typically use the node-async library for this sort of thing. It makes it easy to do exactly what you're talking about:
async.each(yourArray,
function(element, next) {
// this callback gets called for each element in your array
element.doSomething(function(returnValue){
next(returnValue) // call next when you're done
}
}, function(err, returnValues) {
// when all the elements in the array are processed, this is called
if (err) return console.log(err);
console.log(returnValues) // this is an array of the returnValues
});
})
You could simply count them. In your case it seems you already know how many callbacks there are going to be.
var remaining = idsCollection.length; // assuming array
for (id in idsCollection) {
object.callService(id, function (res) {
collection.push(res);
remaining -= 1; // decrement by 1 per callback
// here you can check if remaining === 0 (all done)
});
}
you can use nimble lib http://caolan.github.io/nimble/.
nimble paralel example
var _ = require('nimble');
_.parallel([
function (callback) {
setTimeout(function () {
console.log('one');
callback();
}, 25);
},
function (callback) {
setTimeout(function () {
console.log('two');
callback();
}, 0);
}
], function(){
console.log('done')
});
output
> two
> one
> done
I see many answers here, but I hope that this solution may still help someone.
Create a promise for each callback to be extinguished as such:
function funcToLoop(arg){
return new Promise((resolve, reject) => {
try{
funcWithCallback(arg, (cbArg) => {
// do your stuff
resolve(cbArg)
});
} catch (e) {
reject(e)
}
});
}
Then, you can create a loop as a async function and handle eventual results/states/etc here:
async function mainLoop(array){
let results = [];
for (let arg of array){
results.push(await funcToLoop(arg))
}
// handle results
}
... or you can have a sync function, collect the promises and handle them:
function mainLoop(array){
let promises = [];
for (let arg of array){
promises.push(funcToLoop(arg))
}
Promise.all(promises).then(()=>{
// handle promises
})
}
Claudio
jQuery.Deferred() objects might be what you are looking for.
OR if you are using HTML5 you can use promises .
Here is how to create promises
var promise = new Promise(function(resolve, reject) {
// do a thing, possibly async, then…
if (/* everything turned out fine */) {
resolve("Stuff worked!");
}
else {
reject(Error("It broke"));
}
});
And here is how to use them
promise.then(function(result) {
console.log(result); // "Stuff worked!"
}, function(err) {
console.log(err); // Error: "It broke"
});
Check this link for more info
If you're using jQuery, you can use $.when
Example:
exmCall1 = $.getJson(..);
exmCall2 = $.getJson(..);
$.when(exmCall1, exmCall2).done(function (exmCall1Ret, exmCall2Ret) {
//do stuff
});
You can read the actual documentation here: http://api.jquery.com/jquery.when/
Or do some hardcode:
var running;
for (id in idsCollection) {
object.callService(id, function (res) {
collection.push(res);
running += 1;
});
}
var loop = setInterval(function() {
if(running >= idsCollection.length) {
filewriter.writetoFile("filename.json", JSon.Stringify(collection));
clearInterval(loop);
}
, 500);

node.js - how do I run a series of callback functions in order?

like many other peoples, I want to turn a async function of a third party module (Patio) into a sync function.
function get_user_message_list(parameters, array, response)
{
var new_array = [];
for (var i in array) {
var json = array[i];
json['content']['users_seen'] = ["1757842565"];
json['content']['users_not_seen'] = [];
new_array.push(json);
}
console.log("NEW ARRAY :");
console.log(new_array);
response.writeHeader(200, {'Content-Type':'application/json'});
response.end(JSON.stringify(new_array));
}
function get_json_message(parameters, json)
{
console.log("JSON OBJECT :");
console.log(json);
var dataset = db.from(TABLES.USER).join(TABLES.MOVIE_LIST, {MLUserId: sql.URId}).join(TABLES.MOVIE, {MVId: sql.MLMovieId});
dataset.where('MLSeen={seen} AND MVSourceId={movie} AND MVSource={source} AND URId!={user}', {seen: 1, movie: json['content']['movie_id'], source: json['content']['movie_source'], user:parameters.FACEBOOK_ID}).all().then(function(users){
if (users) {
for (var j in users) {
json['content']['users_seen'].push(users[j].URId);
}
}
//console.log(json['content']['users_seen']);
dataset.where('MLSeen={seen} AND MVSourceId={movie} AND MVSource={source} AND URId!={user}', {seen: 0, movie: json['content']['movie_id'], source: json['content']['movie_source'], user:parameters.FACEBOOK_ID}).all().then(function(users){
if (users) {
for (var j in users) {
json['content']['users_not_seen'].push(users[j].URId);
}
}
console.log(json);
}, errorHandler);
}, errorHandler);
}
In the get_user_message_list function I iterate into an array and for each iteration I calling the async function. In this async function I'm using Patio module to make request to MySQL database. But like you can see, I must wait for the query result to be get after sending a result to the previous function.
How can I wait for the query result to be got before I send it to the next function?
You CAN and you SHOULD turn async functions into something that behaves like sync functions when a problem needs to be fixed. You can't is never the correct answer, the shouldn't is for the programmer to answer.
So, I recently found some code in the nodeunit module which may help you. It fires the async functions, keep track of which are ready. After all requests are in, fires the callback. This could be the idea behind the solution to your problem (so no, this is not the final solution).
async.forEachSeries = function (arr, iterator, callback) {
if (!arr.length) {
return callback();
}
var completed = 0;
var iterate = function () {
iterator(arr[completed], function (err) {
if (err) {
callback(err);
callback = function () {};
}
else {
completed += 1;
if (completed === arr.length) {
callback();
}
else {
iterate();
}
}
});
};
iterate();
};
This test triggered me to see how it was done:
exports['series'] = function (test) {
var call_order = [];
async.series([
function (callback) {
setTimeout(function () {
call_order.push(1);
callback(null, 1);
}, 25);
},
function (callback) {
setTimeout(function () {
call_order.push(2);
callback(null, 2);
}, 50);
},
function (callback) {
setTimeout(function () {
call_order.push(3);
callback(null, 3, 3);
}, 15);
}
],
function (err, results) {
test.equals(err, null);
test.same(results, [1, 2, [3, 3]]);
test.same(call_order, [1, 2, 3]);
test.done();
});
};
Happy programming!
You can't and you shouldn't. This would effectively block your Node.JS server and you would loose every advantage Node.JS provides. Also it is against the whole asynchronous idea behind Node.JS.
Just pass callback parameter to your function:
function get_json_message(parameters, json, callback){ // <---- note the callback
// some other stuff
dataset.where( ...
// some other stuff
dataset.where( ...
// some other stuff
// I've finished the job, call the callback
callback(); // <--- you can pass additional params here
});
});
}
and call it like this:
get_json_message( params, json, function() {
console.log('Hello world!');
// do whatever you like inside callback
});
I've been using syncrhonize.js with great success. There's even a pending pull request (which works quite well) to support async functions which have multiple parameters. Far better and easier to use than node-sync imho. Added bonus that it has easy-to-understand and thorough documentation, whereas node-sync does not.

Categories

Resources