https.get call within forEach in Node.js - javascript

I am doing several https.get calls in a async.waterfall block. In the last function(callback), I have a forEach loop that iterates over the variable I received in the last call and makes GET calls accordingly.
My problem is that when i see in the UI, it does not retrieve all files every time. Sometimes it shows all files but sometimes only partial files. The profile page render happens before all the REST calls are finished and fileData is not fully populated. How can i fix this? Here is my code -
async.waterfall([
function(callback) {
//I do a https.get call here and pass on 2 arguments to the next method
},
function (var1, var2, callback) {
//I do a https.get call here and pass on 2 + 1 arguments to the next method
},
function (var1, var2, var3, callback) {
//Here i do a https OPTIONS call to get all the available REST endpoints and pass those as var4
},
function (var1, var2, var3, var4, callback) {
var fileData = [];
var4.forEach(function(element){
const url = host + var1 + element;
https.get(url, function (n) {
var output = '';
n.on('data', function (chunk) {
output += chunk;
});
n.on('end', function () {
var fileJson = JSON.parse(output);
console.log("FileJSON data here :", fileJson);
if(fileJson) {
if (fileJson.owner == req.user.username) {
fileData.push(fileJson);
}
}
});
if(var4[var4.length - 1] == element){
n.on('end', function () {
res.render('viewProfile',{
title: 'View Profile',
fileData: JSON.stringify(fileData),
events: JSON.stringify(var2),
customerEvents: JSON.stringify(var3)
});
});
callback(null, 'done');
}
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
});
}
], function(err, result){
if(err) return next(err);
});

Related

Get the value from callback function and set to parent function

I am new to Nodejs and javascript and working on nodejs api code. I am using GandiAPI to check domainAvaliablity(Project related requirement) and have created a get request method (checkDomainAvaliablity) like this.
exports.checkDomainAvaliablity = function (req, res) {
gandiApi.methodCall('domain.available', [gandiapikey, [domain]], callback)
};
And I have a callback function which have 2 parameters(which I can not change).
I am able to get the value succesfully in my callback function.
Now I want to return "value" from callback and want to set in "res" paramameter of checkDomainAvaliablity function(Parent function) (something like res.json(task)) .
var callback = function (error, value) {
console.dir(value)
if (value[domain] == 'pending') {
console.log('result is not yet ready')
setTimeout(function () {
gandiApi.methodCall('domain.available', [gandiapikey, [domain]],
callback)
}, 700)
}
else {
console.dir(value)
}
// I want to return "value" from here and want to set in "res" paramameter of checkDomainAvaliablity function (Parent function).
}
Note: Use of callbackfuncion is neccessary.
Thanks #trincot. Putting the callback function inside the parent function works fine.
exports.checkDomainAvaliablity = function (req, res) {
domain = req.params.domainAvaliablity
var callback = function (error, value) {
console.log("logging" + value + error)
if (value[domain] == 'pending') {
console.log('result is not yet ready')
setTimeout(function () {
gandiApi.methodCall('domain.available', [gandiapikey, [domain]],
callback)
}, 700)
}
else {
res.send(value);
console.dir(value)
}
}
gandiApi.methodCall('domain.available', [gandiapikey, [domain]], callback)
};

Asynchronous Calls and Recursion with Node.js

I'm looking to execute a callback upon the full completion of a recursive function that can go on for an undetermined amount of time. I'm struggling with async issues and was hoping to get some help here. The code, using the request module, is as follows:
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, callback);
});
};
var recurse = function(startingPlace, callback) {
request.get({
url: 'bbb'
}, function(error, response, body) {
// store body somewhere outside these funtions
// make second request
request.get({
url: 'ccc'
}, function(error, response, body) {
var anArray = JSON.parse(body).stuff;
if (anArray) {
anArray.forEach(function(thing) {
request.get({
url: 'ddd'
}, function(error, response, body) {
var nextPlace = JSON.parse(body).place;
recurse(nextPlace);
});
})
}
});
});
callback();
};
start(function() {
// calls final function to print out results from storage that gets updated each recursive call
finalFunction();
});
It seems that once my code goes past the for loop in the nested requests, it continues out of the request and ends the initial function call while the recursive calls are still going on. I want it to not finish the highest-level iteration until all the nested recursive calls have completed (which I have no way of knowing how many there are).
Any help is GREATLY appreciated!
In your example you have no recursive calls. If I understand correctly you want to say that recurse(point, otherFunc); is the beginning of a recursive call.
Then just go back to the definition of the recursive call (which you have not shown in your post) and do this (add a third argument for a callback function to be called in the end of recursion; the caller will pass it as a parameter):
function recurse(startingPlace, otherFunc, callback_one) {
// code you may have ...
if (your_terminating_criterion === true) {
return callback_one(val); // where val is potentially some value you want to return (or a json object with results)
}
// more code you may have
}
Then in the original code that you posted, make this call instead (in the inner-most part):
recurse(startingPlace, otherFunc, function (results) {
// results is now a variable with the data returned at the end of recursion
console.log ("Recursion finished with results " + results);
callback(); // the callback that you wanted to call right from the beginning
});
Just spend some time and try to understand my explanation. When you understand, then you will know node. This is the node philosophy in one post. I hope it is clear. Your very first example should look like this:
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, otherFunc, function (results) {
console.log ("Recursion finished with results " + results);
callback();
});
});
};
Below is only additional information in case you are interested. Otherwise you are set with the above.
Typically in node.js though, people return an error value as well, so that the caller knows if the function that was called has finished successfully. There is no big mystery here. Instead of returning just results people make a call of the form
return callback_one(null, val);
Then in the other function you can have:
recurse(startingPlace, otherFunc, function (recError, results) {
if (recErr) {
// treat the error from recursion
return callback(); // important: use return, otherwise you will keep on executing whatever is there after the if part when the callback ends ;)
}
// No problems/errors
console.log ("Recursion finished with results " + results);
callback(); // writing down `return callback();` is not a bad habit when you want to stop execution there and actually call the callback()
});
Update with my suggestion
This is my suggestion for the recursive function, but before that, it looks like you need to define your own get:
function myGet (a, callback) {
request.get(a, function (error, response, body) {
var nextPlace = JSON.parse(body).place;
return callback(null, nextPlace); // null for no errors, and return the nextPlace to async
});
}
var recurse = function(startingPlace, callback2) {
request.get({
url: 'bbb'
}, function(error1, response1, body1) {
// store body somewhere outside these funtions
// make second request
request.get({
url: 'ccc'
}, function(error2, response2, body2) {
var anArray = JSON.parse(body2).stuff;
if (anArray) {
// The function that you want to call for each element of the array is `get`.
// So, prepare these calls, but you also need to pass different arguments
// and this is where `bind` comes into the picture and the link that I gave earlier.
var theParallelCalls = [];
for (var i = 0; i < anArray.length; i++) {
theParallelCalls.push(myGet.bind(null, {url: 'ddd'})); // Here, during the execution, parallel will pass its own callback as third argument of `myGet`; this is why we have callback and callback2 in the code
}
// Now perform the parallel calls:
async.parallel(theParallelCalls, function (error3, results) {
// All the parallel calls have returned
for (var i = 0; i < results.length; i++) {
var nextPlace = results[i];
recurse(nextPlace, callback2);
}
});
} else {
return callback2(null);
}
});
});
};
Note that I assume that the get request for 'bbb' is always followed by a get request for 'ccc'. In other words, you have not hidden a return point for the recursive calls where you have the comments.
Typically when you write a recursive function it will do something and then either call itself or return.
You need to define callback in the scope of the recursive function (i.e. recurse instead of start), and you need to call it at the point where you would normally return.
So, a hypothetical example would look something like:
get_all_pages(callback, page) {
page = page || 1;
request.get({
url: "http://example.com/getPage.php",
data: { page_number: 1 },
success: function (data) {
if (data.is_last_page) {
// We are at the end so we call the callback
callback(page);
} else {
// We are not at the end so we recurse
get_all_pages(callback, page + 1);
}
}
}
}
function show_page_count(data) {
alert(data);
}
get_all_pages(show_page_count);
I think you might find caolan/async useful. Look especially into async.waterfall. It will allow you to pass results from a callback from another and when done, do something with the results.
Example:
async.waterfall([
function(cb) {
request.get({
url: 'aaa.com'
}, function(err, res, body) {
if(err) {
return cb(err);
}
cb(null, JSON.parse(body).id);
});
},
function(id, cb) {
// do that otherFunc now
// ...
cb(); // remember to pass result here
}
], function (err, result) {
// do something with possible error and result now
});
If your recursive function is synchronous, just call the callback on the next line:
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, otherFunc);
// Call output function AFTER recursion has completed
callback();
});
};
Else you need to keep a reference to the callback in your recursive function.
Pass the callback as an argument to the function and call it whenever it is finished.
var start = function(callback) {
request.get({
url: 'aaa.com'
}, function (error, response, body) {
var startingPlace = JSON.parse(body).id;
recurse(startingPlace, otherFunc, callback);
});
};
Build your code from this example:
var udpate = function (callback){
//Do stuff
callback(null);
}
function doUpdate() {
update(updateDone)
}
function updateDone(err) {
if (err)
throw err;
else
doUpdate()
}
doUpdate();
With ES6, 'es6-deferred' & 'q'. You could try as following,
var Q = require('q');
var Deferred = require('es6-deferred');
const process = (id) => {
var request = new Deferred();
const ids =//do something and get the data;
const subPromises = ids.map(id => process(id));
Q.all(subPromises).then(function () {
request.resolve();
})
.catch(error => {
console.log(error);
});
return request.promise
}
process("testId").then(() => {
console.log("done");
});

async.series and async.each not working as expected

I am attempting to build a web scraper using nodeJS that searches a website's HTML for images, caches the image source URLs, then searches for the one with largest size.
The problem I am having is deliverLargestImage() is firing before the array of image source URLs is looped through to get their file sizes. I am attempting to use both async.series and async.each to have this work properly.
How do I force deliverLargestImage() to wait until the async.each inside getFileSizes() is finished?
JS
var async, request, cheerio, gm;
async = require('async');
request = require('request');
cheerio = require('cheerio');
gm = require('gm').subClass({ imageMagick: true });
function imageScraper () {
var imgSources, largestImage;
imgSources = [];
largestImage = {
url: '',
size: 0
};
async.series([
function getImageUrls (callback) {
request('http://www.example.com/', function (error, response, html) {
if (!error && response.statusCode === 200) {
var $ = cheerio.load(html);
$('img').each(function (i, elem) {
if ( $(this).attr('src').indexOf('http://') > -1 ) {
var src = $(this).attr('src');
imgSources.push(src);
}
});
}
callback();
});
},
function getFileSizes (callback) {
async.each(imgSources, function (img, _callback) {
gm(img).filesize(function (err, value) {
checkSize(img, value);
_callback();
});
});
callback();
},
function deliverLargestImage (callback) {
callback();
return largestImage;
}
]);
function checkSize (imgUrl, value) {
var r, raw;
if (value !== undefined) {
r = /\d+/;
raw = value.match(r)[0];
if (raw >= largestImage.size) {
largestImage.url = imgUrl;
largestImage.size = raw;
}
}
}
}
imageScraper();
Try moving the callback() here:
function getFileSizes (callback) {
async.each(imgSources, function (img, _callback) {
gm(img).filesize(function (err, value) {
checkSize(img, value);
_callback();
});
}, function(err){ callback(err); }); /* <-- put here */
/* callback(); <-- wrong here */
},
each accepts a callback as a third parameter that gets executed when the inner loop over each element is finished:
Arguments
arr - An array to iterate over.
iterator(item, callback) - A function to apply to each item in arr.
The iterator is passed a callback(err) which must be called once it has
completed. If no error has occured, the callback should be run without
arguments or with an explicit null argument.
callback(err) - A callback which is called when all iterator functions
have finished, or an error occurs.

Node.js unexpected stack overflow with multiple get requests

I have a function that GETs a JSON object from a remote server, or from a local cache on-disk.
In a use-case, i have to call this function several thousand times with varying arguments, but when i do so, i get max stack overflow errors. I must be making a recursive call somewhere, but i can't see where it could be as my process.nextTick function calls seem to be in the right place.
I get none of my log.error readouts in the console, which would be evident if any of the recursive calls to retry the request were made.
The console output shows a repeated occurrence of
(node) warning: Recursive process.nextTick detected. This will break in the next version of node. Please use setImmediate for recursive deferral.
then...
RangeError: Maximum call stack size exceeded
Then the program exits.
Can anyone offer any help regarding what i may be doing wrong? I'm completely stumped.
Below is the function that invokes the problematic function "tf2inv.loadInventory()"
function refreshInventories(accounts, force, callback) {
//job executes download function, then pushes to inventories object
var inventories = {};
var Qinv = async.queue(function (task, invCallback) {
tf2inv.loadInventory(
task.force,
task.steamid,
function(inv, alias) {
inventories[alias] = inv;
process.nextTick(invCallback);
}
);
}, 100)
//when all queue jobs have finished, callback with populated inventories object
Qinv.drain = function (err) {
log.info('All inventories downloaded');
callback(inventories);
}
//adding jobs to the queue
for (var i = accounts.length - 1; i >= 0; i--) {
Qinv.push({
force: force,
steamid: accounts[i]
});
};
}
Shown here is the function that either parses from the cache, or requests from the remote server.
//tf2inv
var loadInventory = function(force, sid, callback) {
var invLoc = invFolder+sid
if(force) {
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
}
if(fs.existsSync(invLoc)) {
var body = fs.readFileSync(invLoc);
try {
var inventory = JSON.parse(body);
} catch (e) {
fs.unlinkSync(invLoc);
log.error("parsing " + sid+"'s inventory");
loadInventory(true, sid, invFolder, callback);
return;
}
process.nextTick(function() { callback(inventory, sid) })
return;
} else {
var urlPre = "http://api.steampowered.com/IEconItems_440/GetPlayerItems/v0001/?key=";
var urlSidPre = "&steamid=";
var urlInvSuf = "&inventory=yes";
var URL = urlPre+steam_API+urlSidPre+sid+urlInvSuf;
http.get(URL, function (res) {
var body = '';
res.on('data', function (data) {
body+=data;
fs.appendFile(invLoc, data);
});
res.on('end', function() {
try {
inventory = JSON.parse(body);
} catch (e) {
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
log.error("parsing " + sid+"'s downloaded inventory");
loadInventory(force, sid, invFolder, callback)
return;
}
process.nextTick(function() { callback(inventory, sid) })
return;
});
res.on('error', function (e, socket) {
log.error(sid + " inventory error")
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
log.debug('Retrying inventory')
loadInventory(force, sid, invFolder, callback);
return;
})
res.on('close', function () {res.emit('end'); log.error('connection closed')})
})
.on('error', function(e) {
log.error(JSON.stringify(e));
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
log.debug('Retrying inventory')
loadInventory(force, sid, invFolder, callback)
return;
})
}
};
It is likely to be failing to parse the body coming back from the server. It then immediately calls itself again, failing again, infinitely looping and causing a stack overflow.
I suggest you do not retry automatically on a failed parse - if it fails once, it is likely to fail again. It would be best to call back with the error, and let the part of your programming calling this handle the error, or passing it back to the point where it can let the user know that something is wrong.

Closures to get variable in node.js

I am working on thesis software for get variable in closures.
This is my code in node.js
var kepala = express.basicAuth(authentikasi);
// authenticate for login
function authentikasi(user, pass, callback) {
// declrare my database mongodb
db.collection('ak_teacher', function (err, data) {
data.findOne({
'tch_name': {
'$regex': user
}
}, function (err, level) {
console.log(level); // monitor data
if (level == null) {
console.log('Nilai database kepala sekolah masuk Null ulangi login');
callback(null);
} else {
var a = level.tch_name;
var b = level.tch_password;
var c = level.sch_id; // (i need this variable for next code)
var result = (user === a && pass === b);
console.log("id Sekolah : " + c);
callback(null /* error */ , result);
}
});
});
};
var tes = authentikasi(); // (in here i dont know declare for get my variable c)
app.get('/siswa_2', kepala, function (req, res) {
// i use variable in here
var sch_id = tes;
console.log("id school in query :" + sch_id);
console.log('Menampilkan Seluruh data Siswa');
db.collection('ak_student', function (err, collection) {
collection.find({
"sch_id": sch_id
}).toArray(function (err, items) {
console.log(items);
res.send(items);
});
});
});
I am trying to get variable c.
You need to pass c into the callback as well to get it's value:
callback(null /* error */, result, c);
Then call authentikasi like this:
// Note, since this callback is called by passing "c"
// as the third argument, this is how we assign it to "tes"
// |
// |
// V
authentikasi(user,pass,function(unused,result,tes) {
app.get('/siswa_2', kepala, function(req, res) {
var sch_id = tes;
console.log("id school in query :" +sch_id);
console.log('Menampilkan Seluruh data Siswa');
db.collection('ak_student', function(err, collection) {
collection.find({"sch_id":sch_id}).toArray(function(err, items) {
console.log(items);
res.send(items);
});
});
});
});
According to the docs, you're supposed to call back with a user object. You can simply put your c variable on there:
var kepala = express.basicAuth(authentikasi);
function authentikasi(user, pass, callback) {
// declrare my database mongodb
db.collection('ak_teacher', function (err, data) {
data.findOne({
'tch_name': {
'$regex': user
}
}, function (err, level) {
console.log(level); // monitor data
if (level == null) {
console.log('Nilai database kepala sekolah masuk Null ulangi login');
callback(null);
} else {
var a = level.tch_name;
var b = level.tch_password;
var c = level.sch_id; // (i need this variable for next code)
if (user === a && pass === b) {
console.log("id Sekolah : " + c);
callback(null, {name: a, id: c});
// ^^^^^^^^^^^^^^^^
} else
callback(null, false);
}
});
});
};
// don't try to authenticate outside of the request context
app.get('/siswa_2', kepala, function (req, res) {
var sch_id = req.user.id;
// ^^^^^^^^^^^
console.log("id school in query :" + sch_id);
…
});
You could also take advantage of locals:
Response local variables are scoped to the request, thus only
available to the view(s) rendered during that request / response
cycle, if any. Otherwise this API is identical to app.locals.
This object is useful for exposes request-level information such as
the request pathname, authenticated user, user settings etcetera.
Assigning c to res.locals.c should let you use it in later elements of that particular request and subsequent response.

Categories

Resources