Node.js - wait for multiple async calls - javascript

I'm trying to make multiple MongoDB queries before I render a Jade template, but I can't quite figure out how to wait until all the Mongo Queries are completed before rendering the template.
exports.init = function(req, res){
var NYLakes = {};
var NJLakes = {};
var filterNY = {"State" : "NY"};
db.collection('lakes').find(filterNY).toArray(function(err, result) {
if (err) throw err;
NYLakes = result;
});
var filterNJ = {"State" : "NJ"};
db.collection('lakes').find(filterNJ).toArray(function(err, result) {
if (err) throw err;
NJLakes = result;
});
res.render('explore/index', {
NYlakes: NYLakes,
NJlakes: NJLakes
});
};

I'm a big fan of underscore/lodash, so I usually use _.after, which creates a function that only executes after being called a certain number of times.
var finished = _.after(2, doRender);
asyncMethod1(data, function(err){
//...
finished();
});
asyncMethod2(data, function(err){
//...
finished();
})
function doRender(){
res.render(); // etc
}
Since javascript hoists the definition of functions defined with the function funcName() syntax, your code reads naturally: top-to-bottom.

Assuming you want to run the two operations in parallel rather than waiting for one to finish before starting the next, you'll need to track how many operations have completed in each callback.
In raw node.js javascript, one way to do this would be this:
exports.init = function(req, res){
var NYLakes = null;
var NJLakes = null;
var filterNY = {"State" : "NY"};
db.collection('lakes').find(filterNY).toArray(function(err, result) {
if (err) throw err;
NYLakes = result;
complete();
});
var filterNJ = {"State" : "NJ"};
db.collection('lakes').find(filterNJ).toArray(function(err, result) {
if (err) throw err;
NJLakes = result;
complete();
});
function complete() {
if (NYLakes !== null && NJLakes !== null) {
res.render('explore/index', {
NYlakes: NYLakes,
NJlakes: NJLakes
});
}
}
};
Basically what's happening here is that you check at the end of each operation if all of them have finished, and at that point you finish off the operation.
If you're doing a lot of these things, take a look at the async library as an example of a tool to make it easier to manage this sort of thing.

You can use async module:
var states = [{"State" : "NY"},{"State" : "NJ"}];
var findLakes = function(state,callback){
db.collection('lakes').find(state).toArray(callback);
}
async.map(states, findLakes , function(err, results){
// do something with array of results
});

Wait.for https://github.com/luciotato/waitfor
using Wait.for:
exports.init = function(req, res){
var NYLakes = {};
var NJLakes = {};
var coll = db.collection('lakes');
var filterNY = {"State" : "NY"};
var a = wait.forMethod(coll,'find',filterNY);
NYLakes = wait.forMethod(a,'toArray');
var filterNJ = {"State" : "NJ"};
var b = wait.forMethod(coll,'find',filterNJ);
NJLakes = wait.forMethod(b,'toArray');
res.render('explore/index',
{
NYlakes: NYLakes,
NJlakes: NJLakes
}
);
};
Requesting in parallel using wait.for parallel map:
exports.init = function(req, res){
var coll = db.collection('lakes');
//execute in parallel, wait for results
var result = wait.parallel.map(
[{coll:coll,filter:{"State" : "NY"}}
, {coll:coll,filter:{"State" : "NJ"}}]
, getData);
res.render('explore/index',
{
NYlakes: result[0],
NJlakes: result[1]
}
);
};
//map function
function getData(item,callback){
try{
var a = wait.forMethod(item.coll,'find',item.filter);
var b = wait.forMethod(a,'toArray');
callback (null, b);
} catch(err){
callback(err);
}
I'm not familiar with mongo, so you may have to adjust the calls.

This seems like the least lines of code using await:
var async = require("async"); //include async module
...
async function getData() { //make sure to use async function
var NYlakes = await db.collection('lakes').find(filterNY); //can append additional logic after the find()
var NJlakes = await db.collection('lakes').find(filterNJ);
res.json({"NYLakes": NYLakes, "NJLakes": NJLakes}); //render response
}
getData();
Side note: In this case await is serving as a Promise.all() be careful not to abuse the await function.

Related

Using Request.js and Cheerio.js in Node/Express return empty array

I'm building a simple scraper using Request.js and Cheerio.js within Express. Right now I'm only looking for the title of the site. Instead scraping a website one by one, I put the list in an array. I parse through them and then use Cheerio.js to find the Title of the website. When I console log the titles, they come out fine, but I want to ultimately show them on a html page. Please note, I'm very new to programming so if you could provide detailed feedback, that would be incredibly helpful (below is the code I've been working on). Thanks in advance!
function parseSites(urls) {
var parsedSites = [];
urls.forEach(function(site) {
request(site, function(err, res, body) {
if(err) {
console.log(err);
} else {
var $ = cheerio.load(body);
parsedSites.push($('title').text());
}
}
});
});
return parsedSites;
}
Please refer to the below code for a working implementation
var request = require('request-promise')
var cheerio = require("cheerio")
function parseSites(urls, callback) {
var parsedSites = [];
var promiseList = urls.map(getPage)
Promise.all(promiseList).then(function (data) {
callback(data.map(parse))
})
return parsedSites;
}
function getPage(url) {
return request.get(url)
}
function parse(body) {
console.log("parsing body")
var $ = cheerio.load(body);
return $('title').text()
}
parseSites(['https://www.google.com','https://www.facebook.com'],function(data) {
console.log(data)
})
First you need to understand the difference between asynchronous and synchronous code. Lets see an example:
function testFor() {
for(let i=0;i<5;++i){
console.log(i);
}
}
-
console.log('start:');
testFor();
console.log('end:');
// Here you get the expected output because this code is synchronous.
//output:
start:
0
1
2
3
4
end:
-
console.log('start:');
setTimeout(testFor,1000);
console.log('end:');
// Here you don't get your expected output because setTimeout is asynchronous .
//output:
start:
end:
0
1
2
3
4
First the console.log('start:'); is called.
Then setTimeout(testFor,1000); (but it is async and the call
will execute in 1 second).
Immediately after the console.log('end:');
is called.
Finally 1 second after, the testFor() is executed and it
prints 0 1 2 3 4
The next point is that there is an error in your code!
function parseSites(urls) {
var parsedSites = [];
urls.forEach(function(site) {
request(site, function(err, res, body) {
if(err) {
console.log(err);
} else {
var $ = cheerio.load(body);
parsedSites.push($('title').text());
}
//} ! THIS bracket should be removed
});
});
return parsedSites;
}
So your problem is that the 'request' in the forEach loop is an async function that will call the callback 'function(err, res, body)' once there is a response from the web page.
My solutions for this:
'use strict'
const cheerio = require('cheerio');
const request = require('request');
const async = require('async');
const urls = ['http://stackoverflow.com/','http://hackaday.com/','https://www.raspberrypi.org/','https://cheerio.js.org/'];
//SOLUTION 1: do what you need to do when all calls are done using recursion
let i=0;
let parsedSites = [];
parseSites(urls[i],parsedSites);
function finalCall(sites) {
console.log(sites);
}
function parseSites(site,parsedSites) {
++i;
request(site, function(err, res, body) {
if(err) {
console.log(err);
} else {
let $ = cheerio.load(body);
let title = $('title').text();
console.log(title);
parsedSites.push(title);
}
if(i<urls.length){
parseSites(urls[i],parsedSites);// recursive call;
}
else{
finalCall(parsedSites);// when all sites are done.
}
});
//return parsedSites;// cant return! we are in async calls!
}
//SOLUTION 2: do what you need to do when all calls are done using 'async'
parseSites(urls);
function finalCall(sites) {
console.log(sites);
}
function parseSites(urls) {
let parsedSites = [];
async.each(urls,function parseSite(site, callback) {
request(site, function (err, res, body) {
if (err) {
callback(err);
} else {
let $ = cheerio.load(body);
parsedSites.push($('title').text());
callback();
}
})
},function (err) {
if(err) console.log(err);
else finalCall(parsedSites);
});
}
Async github page
Async example

Async for Array problems

I've been pointed towards using the async module, but I'm not quite sure how to use waterfall to solve my problem.
My original code had problems with asynchronicity.
var Image = require('./models/image');
var User = require('./models/user');
var query = Image.find({});
query.limit(10);
query.sort('-date')
query.exec(function (err, collected) {
if (err) return console.error(err);
var i = 0;
var authors = [];
while (i < 8) {
var search = User.find({'twitter.id' : collected[i].author});
search.exec(function (err, user){
if (err) return console.error(err);
var result = (user[0].twitter.username);
authors.push(result);
});
i = i + 1;
}
}
console.log(authors);
I want the authors array to hold all the found usernames. However when that last console.log() call returns '[]'
So, you want to wait for all of the searches to complete first. You should put all your async calls into an array, and then use an async library to chain them together (waterfall) or execute simultaneously (parallel). Parallel tends to execute "faster":
var searches = [];
while (i < 8) {
var search = User.find({'twitter.id' : collected[i].author});
searches.push(function(cb) {
search.exec(function (err, user){
if (err) cb(err, null);
else cb(null, user[0].twitter.username);
});
});
i++;
}
async.parallel(searches, function( err, authors ) {
if ( err ) return console.error( err );
console.log(authors);
// Authors only defined here.
// TODO: More code
});
// Authors not defined here.

NodeJS Variable outside function scope

For the life of me I cannot work this one out. Have look around and tried many many different ways of trying to get this to go. Currently have the following code.
var config = require("./config.js");
var cradle = require('cradle')
var MikroNode = require('mikronode');
var WebServer = require('./bin/www');
var Routers = "Hasnt changed";
var conndb = new(cradle.Connection)(config.couchdb.host);
var db = conndb.database(config.couchdb.db);
db.exists(function(err, exists){
if (err) { console.log('error', err);}
else if (exists) { console.log('Seems the Force is with you - Database Exists');}
else { db.create(); }
});
db.temporaryView({
map: function (doc){
if (doc.type=='ConfigRouter') emit(doc.name, doc);
}
}, function (err, res){
Routers = JSON.stringify(res);
}
);
console.log(Routers);
As it stands it will respond with:
E:\Dev\MM>npm start
> MM#0.0.1 start E:\Dev\MM
> node ./Start.js
Hasnt changed
Seems the Force is with you - Database Exists
I am assuming it is an asynchronous call to the CouchDB and is not filling the result in time before it displays the result. How do I get around this issue?
You are right, the call is asynchronous so when console.log(Routers); is processed, Routers is "Hasnt changed".
One way of doing it would be to use promises thanks to the Q npm module:
var Q = require('q');
var deferred = Q.defer();
db.temporaryView({
map: function (doc) {
if (doc.type=='ConfigRouter') emit(doc.name, doc);
}
}, function (err, res) {
deferred.resolve(JSON.stringify(res));
});
deferred.promise
.then(function (data) {
Routers = data;
console.log(Routers);
// do some stuff...
})
.done();
Maybe it's possible to do something better without using Q.defer and adapting directly the callback:
https://github.com/kriskowal/q#adapting-node

NodeJS console.log executing before executing the FOR LOOP

I am trying to push some values to array by fetching data from Jenkins APIs, like below.
buildNum = 14;
async.waterfall([
function(callback){
for ( var i = buildNum; i > (buildNum-5); i--) {
(function(){
jenkins.build_info('BuildDefinitionRequest', i, function(err, data) {
if (err){ return console.log(err); }
var tmpObj = {};
tmpObj.jobID = data.fullDisplayName;
tmpObj.result = data.result;
tmpObj.dateTime = data.id;
console.log(tmpObj);
finalArray.push(tmpObj);
});
})();
}
callback(null, finalArray, 1);
},
function(finalArray, value, callback){
console.log(finalArray, value);
callback(null, 'done');
}
],function(err, result){
});
But "callback(null, finalArray, 1);" is getting called before the for loop finish its execution.
When I am printing the value of "finalArray" inside the for loop I am able to see all the values.
Technically the for loop has finished executing, but the jenkins.build_info calls haven't. You cannot make async calls inside of a for loop like that and expect the for loop to only finish after all the calls are complete. You're already using async, so this is an easy fix. I would do something like this:
var buildNum = 14;
var builds = [];
// just builds a collection for async to operate on
for(var i = buildNum; i > (buildNum - 5); i--) {
builds.push(i);
}
var finalArray = [];
async.each(builds, function(build, next) {
jenkins.build_info('BuildDefinitionRequest', build, function(err, data) {
if (err) { next(err); }
var job = {
jobID: data.fullDisplayName,
result: data.result,
dateTime: data.id
};
finalArray.push(job);
next();
});
}, function(err) {
// this won't be called until all the jenkins.build_info functional have completed, or there is an error.
console.log(finalArray);
});

returning a variable form an async function

I have a module with a function which generates the value for a vaariable for a variable "stitcheBook". I can see and use this value using a callback.
However, I want to have this value available to me as a module property. How can i achieve this?
Note: I wish the output of the _BookStitcher.stitchAllStories function to go into the _BookStitcher.stitchedBook property.
module.exports = _BookStitcher = (function() {
var db = require('../modules/db');
var stitchedBook = {};
var stitchAllStories = function(callback) {
db.dbConnection.smembers("storyIdSet", function (err, reply) {
if (err) throw err;
else {
var storyList = reply;
console.log(storyList);
// start a separate multi command queue
multi = db.dbConnection.multi();
for (var i=0; i<storyList.length; i++) {
multi.hgetall('story/' + String(storyList[i]) + '/properties');
};
// drains multi queue and runs atomically
multi.exec(function (err, replies) {
stitchedBook = replies;
// console.log(stitchedBook);
callback(stitchedBook);
});
};
});
};
return {
stitchedBook : stitchedBook,
stitchAllStories: stitchAllStories
}
})();
EDIT: to add: I know that I can actually set the value from outside by doing something like this;
_BookStitcher.stitchAllStories(function (reply) {
console.log("Book has been stitched!\n\n")
console.log("the Book is;\n");
console.log(reply);
_BookStitcher.stitchedBook = reply;
console.log("-------------------------------------------------------------------------\n\n\n");
console.log(_BookStitcher.stitchedBook);
});
I was wondering if there was a way of doing it from inside the _BookStitcher module itself.
You could take advantage of how object references work in JavaScript, and assign it to a property:
module.exports = _BookStitcher = (function() {
var db = require('../modules/db');
// CHANGE HERE
var stitched = { book: null };
var stitchAllStories = function(callback) {
db.dbConnection.smembers("storyIdSet", function (err, reply) {
if (err) throw err;
else {
var storyList = reply;
console.log(storyList);
// start a separate multi command queue
multi = db.dbConnection.multi();
for (var i=0; i<storyList.length; i++) {
multi.hgetall('story/' + String(storyList[i]) + '/properties');
};
// drains multi queue and runs atomically
multi.exec(function (err, replies) {
// CHANGE HERE
stitched.book = replies;
// console.log(stitchedBook);
callback(replies);
});
};
});
};
return {
stitched : stitched,
stitchAllStories: stitchAllStories
};
}());
So instead of having it inside _BookStitcher.stitchedBook, you'd have it at _BookStitcher.stitched.book.
But that looks awful, and I'd never use it! You can't know when the value will be available, it's only safe to use it from the callback, when you're sure it's been set.

Categories

Resources