Node.js async functions - javascript

The following code should handle http request by produce a request message to some remote server (kafka broker) and wait for consuming a response for it. when a respond message arrive - it should be returned as an http respond (json or something).
router.get('/status', function(req, res, next) {
// init the producer
...
// 1st async function
producer.on('ready', function () {
// some code for generating payloads (data for a message)
...
// 2nd async function
producer.send(payloads, function (err, data) {
// some log of success sending message
...
// 3rd async function
consumer.on('message', function (message) {
// got some response message
res.send("message: " + message);
});
});
});
});
Can I make these sync together even tow it's not mine?
EDIT:
I'll try to be more clear. Consider the following code:
function boo() {
// part 1 - init some consumer
console.log("1. finish init");
// part 2 - This is async function. whenever messages will arrive - this function will be fetched.
consumer.on('message', function (message) {
console.log("2. message arrive!");
return message;
}
// part 3
console.log("3. end function");
return null;
}
Assume that part 2 happen after 1 second. The output will be:
1. finish init
3. end function
2. message arrive!
while my goal is to wait for the async message (part 2) and return it's value. How can I achieve that?

You can use async library.
async.series([
fn1,
fn2
], function (err, results) {
console.log(results);
});
Or you could use https://github.com/AndyShin/sequenty
var sequenty = require('sequenty');
function f1(cb) // cb: callback by sequenty
{
console.log("I'm f1");
cb(); // please call this after finshed
}
function f2(cb)
{
console.log("I'm f2");
cb();
}
sequenty.run([f1, f2]);

When asking this question, I was complete new to node.js. After watching Philip Roberts video I realized how JavaScript is actually work. Then, I've solved my issue with a global messageArray & a messageId counter. Each user request is saved in messageArray (with its relevant handlers objects for later response). Then, the message is sent via kafka into the internal system components. The user won't get respond until the message arrived back from the system). When a message arrived to kafka consumer (from the system components) - we extract the relevant id and respond back to the relevant user). Here is the code:
var messageId = 0;
var messageArray= [];
router.get('/status', function(req, res, next) {
var o = {id: messageId, req: req, res: res, next: next};
messageArray.push(o);
messageId++;
// send message with kafka producer into the system internal components - THE MESSAGE CONTAINS THE messageId!
});
consumer.on('message', function (message) {
// Extract the original messageId from the arrived message and look for it in the messageArray
var messageId = extractMessageId(message);
var data = dequeueMessageById(messageId);
// got some response message
data.res.send("message: " + message);
});
function dequeueMessageById(messageId) {
for (var i=0 ; i < messageArray.length ; i++) {
if (messageArray[i].id == messageId) {
var messageData = messageArray[i];
messageArray.splice(index, 1); // remove from array
return messageData;
}
} /* for */
return null;
}

Related

How we can use promises in node js?

As in asynchronous programming we used to callbacks and promises.
Here I am stuck in a problem that may be used to promises. I google it a lot but there is nothing found that solved my problem.
Here My code that I am doing to send push notification in android device.
router.post('/check-notifications', function(req, res, next) {
var user_id = req.body.user_id;
var response = {};
var gcm = require('push-notify').gcm({
apiKey: gcm_apiKey,
retries: 0
});
connection.query('select device_id from devices where user_id = '+ user_id, function (err, result) {
if ( result.length ) {
for (var i = 0; i < result.length; i++) {
console.log(i + 'before notify');
gcm.send({
registrationId: result[i]['device_id'],
data: result[0]
});
console.log(i + 'before transmitted');
gcm.on('transmitted', function (result, message, registrationId) {
console.log('transmitted');
});
gcm.on('transmissionError', function (error, message, registrationId) {
console.log(message);
});
console.log(i + 'after notify');
}
}
});
response['success'] = true;
response['msg'] = 'sent successfully';
res.json(response);
});
Output :
0before notify
0before transmitted
0after notify
1before notify
1before transmitted
1after notify
transmitted
transmitted
transmitted
transmitted
And I think It should be like this.
0before notify
0before transmitted
transmitted
0after notify
1before notify
1before transmitted
transmitted
1after notify
You can use async.mapSeries method for chaining notifications. Replace for loop to:
async.mapSeries(result, function(item, callback) {
gcm.send({
registrationId: item['device_id'],
data: data
});
gcm.on('transmitted', function(result, message, registrationId) {
console.log('transmitted');
callback(null, message, registrationId);
});
gcm.on('transmissionError', function(error, message, registrationId) {
callback(error, message, registrationId);
});
}, function (err, results) {
if (err) throw err;
response['success'] = true;
response['msg'] = 'sent successfully';
res.json(response);
})
I recommend using Bluebird JS for Promise flow-control.
var Promise = require('bluebird'); // Require bluebird, and call it 'Promise', the code below is version 3.x syntax
var connection = {'query': '???'}; // assuming `connection` is already defined somewhere else
var gcm_apiKey = '???'; // assuming `gcm_apiKey` is already defined
router.post('/check-notifications', function (req, res, next) {
var user_id = req.body.user_id;
var gcm = require('push-notify').gcm({
apiKey: gcm_apiKey,
retries: 0
});
// assuming `connection` is already defined somewhere else
// Make an async version of connection.query
connection.queryAsync = Promise.promisify(connection.query);
connection.queryAsync('select device_id from devices where user_id = ' + user_id)
// Bluebird's Promise.map would execute the following block once per result, asynchronously.
// The sequence of who runs first and who completes first is undefined
.map(function (result, i) {
// the `result` argument here is `result[i]` of the original code, since we're in the map context
// Here we have to create a promise to consume events
return new Promise(function (resolve, reject) {
console.log(i + 'before notify');
gcm.send({
registrationId: result['device_id'],
data: result // original code is written as result[0], which I don't quite understand. Always sending the first result?
});
// This does not make sense console logging here, as it is not actually 'before transmitted'
// It's just binding onto the event
// console.log(i + 'before transmitted');
gcm.on('transmitted', function (result, message, registrationId) {
// Check registrationId
if (registrationId === result['device_id']) {
console.log('transmitted');
resolve(result); // use 'result' as the Promise's resolved value
}
});
gcm.on('transmissionError', function (error, message, registrationId) {
// Check registrationId
if (registrationId === result['device_id']) {
console.log(message);
reject(message); // reject errors and send the message as the promise's reject reason
}
});
// Technically, you should log it as "after event binding"
console.log(i + 'after notify');
});
}).then(function (results) {
// `results` should contain all the result from the 'transmitted' event
var response = {};
response['success'] = true;
response['msg'] = 'sent successfully';
res.json(response);
});
});
Note: The is actually more or less doable without any libraries but with native Promises, but the syntax would be more cluttering.

Asynchronously Write Large Array of Objects to Redis with Node.js

I created a Node.js script that creates a large array of randomly generated test data and I want to write it to a Redis DB. I am using the redis client library and the async library. Initially, I tried executing a redisClient.hset(...) command within the for loop that generates my test data, but after some Googling, I learned the Redis method is asynchronous while the for loop is synchronous. After seeing some questions on StackOverflow, I can't get it to work the way I want.
I can write to Redis without a problem with a small array or larger, such as one with 100,000 items. However, it does not work well when I have an array of 5,000,000 items. I end up not having enough memory because the redis commands seem to be queueing up, but aren't executed until after async.each(...) is complete and the node process does not exit. How do I get the Redis client to actually execute the commands, as I call redisClient.hset(...)?
Here a fragment of the code I am working with.
var redis = require('redis');
var async = require('async');
var redisClient = redis.createClient(6379, '192.168.1.150');
var testData = generateTestData();
async.each(testData, function(item, callback) {
var someData = JSON.stringify(item.data);
redisClient.hset('item:'+item.key, 'hashKey', someData, function(err, reply) {
console.log("Item was persisted. Result: " +reply);
});
callback();
}, function(err) {
if (err) {
console.error(err);
} else {
console.log.info("Items have been persisted to Redis.");
}
});
You could call eachLimit to ensure you are not executing too many redisClient.hset calls at the same time.
To avoid overflowing the call stack you could do setTimeout(callback, 0); instead of calling the callback directly.
edit:
Forget what I said about setTimeout. All you need to do is call the callback at the right place. Like so:
redisClient.hset('item:'+item.key, 'hashKey', someData, function(err, reply) {
console.log("Item was persisted. Result: " +reply);
callback();
});
You may still want to use eachLimit and try out which limit works best.
By the way - async.each is supposed to be used only on code that schedules the invocation of the callback in the javascript event queue (e.g. timer, network, etc) . Never use it on code that calls the callback immediately as was the case in your original code.
edit:
You can implement your own eachLimit function that instead of an array takes a generator as it's first argument. Then you write a generator function to create the test data. For that to work, node needs to be run with "node --harmony code.js".
function eachLimit(generator, limit, iterator, callback) {
var isError = false, j;
function startNextSetOfActions() {
var elems = [];
for(var i = 0; i < limit; i++) {
j = generator.next();
if(j.done) break;
elems.push(j.value);
}
var activeActions = elems.length;
if(activeActions === 0) {
callback(null);
}
elems.forEach(function(elem) {
iterator(elem, function(err) {
if(isError) return;
else if(err) {
callback(err);
isError = true;
return;
}
activeActions--;
if(activeActions === 0) startNextSetOfActions();
});
});
}
startNextSetOfActions();
}
function* testData() {
while(...) {
yield new Data(...);
}
}
eachLimit(testData(), 10, function(item, callback) {
var someData = JSON.stringify(item.data);
redisClient.hset('item:'+item.key, 'hashKey', someData, function(err, reply) {
if(err) callback(err);
else {
console.log("Item was persisted. Result: " +reply);
callback();
}
});
}, function(err) {
if (err) {
console.error(err);
} else {
console.log.info("Items have been persisted to Redis.");
}
});

Understanding control flow in Node.js applications

I am trying to understand control flow in Node.js applications. Specifically does control returns to the original function once callback method completes (like a callback stack in recursive calls). I wrote a simple program that make a GET call and return the data. Here is the program:
Code:
var async = require('async');
var http = require('http');
function getGoogleData(url, callback) {
http.get(url, function(response) {
if (response.statusCode == 200) {
var googleInfo = '';
response.on('data', function(chunk) {
console.log("receiving data... ");
googleInfo += chunk;
return;
});
response.on('end', function() {
console.log("End of data receive... ");
response.setEncoding('utf8');
return callback(null, googleInfo);
});
}
console.log("I am here but why!");
//callback(new Error("GET called failed status_code=" + response.statusCode));
});
console.log("Return from get google data");
}
async.waterfall([
function(callback) {
console.log("In func 1");
getGoogleData("http://www.google.com", callback);
},
function(data, callback) {
console.log("In func 2");
callback(data);
}],
function (err, res) {
console.log("In err fn");
});
Here is output of the program:
Output:
In func 1
Return from get google data
I am here but why!
receiving data...
receiving data...
End of data receive...
In func 2
In err fn
Can someone help me understand why 'I am here but why!' line gets printed as the second output line in console log even after returning from 'data' event emitter? What is the overall control flow here?
The reason you're seeing that message logged first is that all that the code inside the if block is doing is adding event handlers. Those events are emitted some time in the future, after your console.log has already executed.
It's a similar reason why "Return from get google data" gets printed before the request finishes, because the http request is asynchronous.

Using Async waterfall in node.js

I have 2 functions that I'm running asynchronously. I'd like to write them using waterfall model. The thing is, I don't know how..
Here is my code :
var fs = require('fs');
function updateJson(ticker, value) {
//var stocksJson = JSON.parse(fs.readFileSync("stocktest.json"));
fs.readFile('stocktest.json', function(error, file) {
var stocksJson = JSON.parse(file);
if (stocksJson[ticker]!=null) {
console.log(ticker+" price : " + stocksJson[ticker].price);
console.log("changing the value...")
stocksJson[ticker].price = value;
console.log("Price after the change has been made -- " + stocksJson[ticker].price);
console.log("printing the the Json.stringify")
console.log(JSON.stringify(stocksJson, null, 4));
fs.writeFile('stocktest.json', JSON.stringify(stocksJson, null, 4), function(err) {
if(!err) {
console.log("File successfully written");
}
if (err) {
console.error(err);
}
}); //end of writeFile
} else {
console.log(ticker + " doesn't exist on the json");
}
});
} // end of updateJson
Any idea how can I write it using waterfall, so i'll be able to control this? Please write me some examples because I'm new to node.js
First identify the steps and write them as asynchronous functions (taking a callback argument)
read the file
function readFile(readFileCallback) {
fs.readFile('stocktest.json', function (error, file) {
if (error) {
readFileCallback(error);
} else {
readFileCallback(null, file);
}
});
}
process the file (I removed most of the console.log in the examples)
function processFile(file, processFileCallback) {
var stocksJson = JSON.parse(file);
if (stocksJson[ticker] != null) {
stocksJson[ticker].price = value;
fs.writeFile('stocktest.json', JSON.stringify(stocksJson, null, 4), function (error) {
if (err) {
processFileCallback(error);
} else {
console.log("File successfully written");
processFileCallback(null);
}
});
}
else {
console.log(ticker + " doesn't exist on the json");
processFileCallback(null); //callback should always be called once (and only one time)
}
}
Note that I did no specific error handling here, I'll take benefit of async.waterfall to centralize error handling at the same place.
Also be careful that if you have (if/else/switch/...) branches in an asynchronous function, it always call the callback one (and only one) time.
Plug everything with async.waterfall
async.waterfall([
readFile,
processFile
], function (error) {
if (error) {
//handle readFile error or processFile error here
}
});
Clean example
The previous code was excessively verbose to make the explanations clearer. Here is a full cleaned example:
async.waterfall([
function readFile(readFileCallback) {
fs.readFile('stocktest.json', readFileCallback);
},
function processFile(file, processFileCallback) {
var stocksJson = JSON.parse(file);
if (stocksJson[ticker] != null) {
stocksJson[ticker].price = value;
fs.writeFile('stocktest.json', JSON.stringify(stocksJson, null, 4), function (error) {
if (!err) {
console.log("File successfully written");
}
processFileCallback(err);
});
}
else {
console.log(ticker + " doesn't exist on the json");
processFileCallback(null);
}
}
], function (error) {
if (error) {
//handle readFile error or processFile error here
}
});
I left the function names because it helps readability and helps debugging with tools like chrome debugger.
If you use underscore (on npm), you can also replace the first function with _.partial(fs.readFile, 'stocktest.json')
First and foremost, make sure you read the documentation regarding async.waterfall.
Now, there are couple key parts about the waterfall control flow:
The control flow is specified by an array of functions for invocation as the first argument, and a "complete" callback when the flow is finished as the second argument.
The array of functions are invoked in series (as opposed to parallel).
If an error (usually named err) is encountered at any operation in the flow array, it will short-circuit and immediately invoke the "complete"/"finish"/"done" callback.
Arguments from the previously executed function are applied to the next function in the control flow, in order, and an "intermediate" callback is supplied as the last argument. Note: The first function only has this "intermediate" callback, and the "complete" callback will have the arguments of the last invoked function in the control flow (with consideration to any errors) but with an err argument prepended instead of an "intermediate" callback that is appended.
The callbacks for each individual operation (I call this cbAsync in my examples) should be invoked when you're ready to move on: The first parameter will be an error, if any, and the second (third, fourth... etc.) parameter will be any data you want to pass to the subsequent operation.
The first goal is to get your code working almost verbatim alongside the introduction of async.waterfall. I decided to remove all your console.log statements and simplified your error handling. Here is the first iteration (untested code):
var fs = require('fs'),
async = require('async');
function updateJson(ticker,value) {
async.waterfall([ // the series operation list of `async.waterfall`
// waterfall operation 1, invoke cbAsync when done
function getTicker(cbAsync) {
fs.readFile('stocktest.json',function(err,file) {
if ( err ) {
// if there was an error, let async know and bail
cbAsync(err);
return; // bail
}
var stocksJson = JSON.parse(file);
if ( stocksJson[ticker] === null ) {
// if we don't have the ticker, let "complete" know and bail
cbAsync(new Error('Missing ticker property in JSON.'));
return; // bail
}
stocksJson[ticker] = value;
// err = null (no error), jsonString = JSON.stringify(...)
cbAsync(null,JSON.stringify(stocksJson,null,4));
});
},
function writeTicker(jsonString,cbAsync) {
fs.writeFile('stocktest.json',jsonString,function(err) {
cbAsync(err); // err will be null if the operation was successful
});
}
],function asyncComplete(err) { // the "complete" callback of `async.waterfall`
if ( err ) { // there was an error with either `getTicker` or `writeTicker`
console.warn('Error updating stock ticker JSON.',err);
} else {
console.info('Successfully completed operation.');
}
});
}
The second iteration divides up the operation flow a bit more. It puts it into smaller single-operation oriented chunks of code. I'm not going to comment it, it speaks for itself (again, untested):
var fs = require('fs'),
async = require('async');
function updateJson(ticker,value,callback) { // introduced a main callback
var stockTestFile = 'stocktest.json';
async.waterfall([
function getTicker(cbAsync) {
fs.readFile(stockTestFile,function(err,file) {
cbAsync(err,file);
});
},
function parseAndPrepareStockTicker(file,cbAsync) {
var stocksJson = JSON.parse(file);
if ( stocksJson[ticker] === null ) {
cbAsync(new Error('Missing ticker property in JSON.'));
return;
}
stocksJson[ticker] = value;
cbAsync(null,JSON.stringify(stocksJson,null,4));
},
function writeTicker(jsonString,cbAsync) {
fs.writeFile('stocktest.json',jsonString,,function(err) {
cbAsync(err);
});
}
],function asyncComplete(err) {
if ( err ) {
console.warn('Error updating stock ticker JSON.',err);
}
callback(err);
});
}
The last iteration short-hands a lot of this with the use of some bind tricks to decrease the call stack and increase readability (IMO), also untested:
var fs = require('fs'),
async = require('async');
function updateJson(ticker,value,callback) {
var stockTestFile = 'stocktest.json';
async.waterfall([
fs.readFile.bind(fs,stockTestFile),
function parseStockTicker(file,cbAsync) {
var stocksJson = JSON.parse(file);
if ( stocksJson[ticker] === null ) {
cbAsync(new Error('Missing ticker property in JSON.'));
return;
}
cbAsync(null,stocksJson);
},
function prepareStockTicker(stocksJson,cbAsync) {
stocksJson[ticker] = value;
cbAsync(null,JSON.stringify(stocksJson,null,4));
},
fs.writeFile.bind(fs,stockTestFile)
],function asyncComplete(err) {
if ( err ) {
console.warn('Error updating stock ticker JSON.',err);
}
callback(err);
});
}
Basically nodejs (and more generally javascript) functions that require some time to execute (be it for I/O or cpu processing) are typically asynchronous, so the event loop (to make it simple is a loop that continuously checks for tasks to be executed) can invoke the function right below the first one, without getting blocked for a response. If you are familiar with other languages like C or Java, you can think an asynchronous function as a function that runs on another thread (it's not necessarily true in javascript, but the programmer shouldn't care about it) and when the execution terminates this thread notifies the main one (the event loop one) that the job is done and it has the results.
As said once the first function has ended its job it must be able to notify that its job is finished and it does so invoking the callback function you pass to it. to make an example:
var callback = function(data,err)
{
if(!err)
{
do something with the received data
}
else
something went wrong
}
asyncFunction1(someparams, callback);
asyncFunction2(someotherparams);
the execution flow would call: asyncFunction1, asyncFunction2 and every function below until asyncFunction1 ends, then the callback function which is passed as the last parameter to asyncFunction1 is called to do something with data if no errors occurred.
So, to make 2 or more asynchronous functions execute one after another only when they ended you have to call them inside their callback functions:
function asyncTask1(data, function(result1, err)
{
if(!err)
asyncTask2(data, function(result2, err2)
{
if(!err2)
//call maybe a third async function
else
console.log(err2);
});
else
console.log(err);
});
result1 is the return value from asyncTask1 and result2 is the return value for asyncTask2. You can this way nest how many asynchronous functions you want.
In your case if you want another function to be called after updateJson() you must call it after this line:
console.log("File successfully written");

Node.js unexpected stack overflow with multiple get requests

I have a function that GETs a JSON object from a remote server, or from a local cache on-disk.
In a use-case, i have to call this function several thousand times with varying arguments, but when i do so, i get max stack overflow errors. I must be making a recursive call somewhere, but i can't see where it could be as my process.nextTick function calls seem to be in the right place.
I get none of my log.error readouts in the console, which would be evident if any of the recursive calls to retry the request were made.
The console output shows a repeated occurrence of
(node) warning: Recursive process.nextTick detected. This will break in the next version of node. Please use setImmediate for recursive deferral.
then...
RangeError: Maximum call stack size exceeded
Then the program exits.
Can anyone offer any help regarding what i may be doing wrong? I'm completely stumped.
Below is the function that invokes the problematic function "tf2inv.loadInventory()"
function refreshInventories(accounts, force, callback) {
//job executes download function, then pushes to inventories object
var inventories = {};
var Qinv = async.queue(function (task, invCallback) {
tf2inv.loadInventory(
task.force,
task.steamid,
function(inv, alias) {
inventories[alias] = inv;
process.nextTick(invCallback);
}
);
}, 100)
//when all queue jobs have finished, callback with populated inventories object
Qinv.drain = function (err) {
log.info('All inventories downloaded');
callback(inventories);
}
//adding jobs to the queue
for (var i = accounts.length - 1; i >= 0; i--) {
Qinv.push({
force: force,
steamid: accounts[i]
});
};
}
Shown here is the function that either parses from the cache, or requests from the remote server.
//tf2inv
var loadInventory = function(force, sid, callback) {
var invLoc = invFolder+sid
if(force) {
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
}
if(fs.existsSync(invLoc)) {
var body = fs.readFileSync(invLoc);
try {
var inventory = JSON.parse(body);
} catch (e) {
fs.unlinkSync(invLoc);
log.error("parsing " + sid+"'s inventory");
loadInventory(true, sid, invFolder, callback);
return;
}
process.nextTick(function() { callback(inventory, sid) })
return;
} else {
var urlPre = "http://api.steampowered.com/IEconItems_440/GetPlayerItems/v0001/?key=";
var urlSidPre = "&steamid=";
var urlInvSuf = "&inventory=yes";
var URL = urlPre+steam_API+urlSidPre+sid+urlInvSuf;
http.get(URL, function (res) {
var body = '';
res.on('data', function (data) {
body+=data;
fs.appendFile(invLoc, data);
});
res.on('end', function() {
try {
inventory = JSON.parse(body);
} catch (e) {
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
log.error("parsing " + sid+"'s downloaded inventory");
loadInventory(force, sid, invFolder, callback)
return;
}
process.nextTick(function() { callback(inventory, sid) })
return;
});
res.on('error', function (e, socket) {
log.error(sid + " inventory error")
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
log.debug('Retrying inventory')
loadInventory(force, sid, invFolder, callback);
return;
})
res.on('close', function () {res.emit('end'); log.error('connection closed')})
})
.on('error', function(e) {
log.error(JSON.stringify(e));
if(fs.existsSync(invLoc)) {
fs.unlinkSync(invLoc);
}
log.debug('Retrying inventory')
loadInventory(force, sid, invFolder, callback)
return;
})
}
};
It is likely to be failing to parse the body coming back from the server. It then immediately calls itself again, failing again, infinitely looping and causing a stack overflow.
I suggest you do not retry automatically on a failed parse - if it fails once, it is likely to fail again. It would be best to call back with the error, and let the part of your programming calling this handle the error, or passing it back to the point where it can let the user know that something is wrong.

Categories

Resources