Array filled in the wrong order - javascript

I have a strange problem, when I push my result in my array, the result isn't at the right position in my array (for example the result instead of being at the index number 1 is at the index 3), and when I re-run my module results change of position randomly in the array .
var cote = function(links, callback) {
var http = require('http');
var bl = require('bl');
var coteArgus = [];
for (i = 0; i < links.length; i ++) {
http.get('http://www.website.com/' + links[i], function(response) {
response.pipe(bl(function(err, data) {
if (err) {
callback(err + " erreur");
return;
}
var data = data.toString()
newcoteArgus = data.substring(data.indexOf('<div class="tx12">') + 85, data.indexOf(';</span>') - 5);
myresult.push(newcoteArgus);
callback(myresult);
}));
});
}
};
exports.cote = cote;

The problem lies in the fact that although the for is synchronous the http.get and the pipe operation are not (I/O is async in nodejs) so the order of the array depends on which request and pipe finishes first which is unknown.
Try to avoid making async operations in a loop, instead use libraries like async for flow control.

I think this can be done in the right order, using async map
Here a sample with map and using request module.
// There's no need to make requires inside the function,
// is better just one time outside the function.
var request = require("request");
var async = require("async");
var cote = function(links, callback) {
var coteArgus = [];
async.map(links, function(link, nextLink) {
request("http://www.website.com/" + link, function(err, response, body) {
if (err) {
// if error so, send to line 28 with a error, exit from loop.
return nextLink(err);
}
var newcoteArgus = body.substring(
body.indexOf("<div class='tx12'>") + 85,
body.indexOf(";</span>") - 5
);
// pass to next link, and add newcoteArgus to the final result
nextLink(null, newcoteArgus);
});
},
function(err, results) {
// if there's some errors, so call with error
if(err) return callback(err);
// there's no errors so get results as second arg
callback(null, results);
});
};
exports.cote = cote;
One more thing, i'm not sure, really what you are doing in the part where you search html content in the responses but there's a really good library to work with JQuery selectors from server side maybe can be useful for you.
Here's how you should call the function
// Call function sample.
var thelinks = ["features", "how-it-works"];
cote(thelinks, function(err, data) {
if(err) return console.log("Error: ", err);
console.log("data --> ", data);
});

Related

How to do sequencial HTTP calls?

I have a couple of APIs I need to call to collect and merge information.
I make the first API call and, based on the result, I make several calls to the second one (in a loop).
Since http requests are asynchronous I'm loosing the information. By the time the second step is finished the server (nodejs) already sent the response back to the client.
I've already tried to, somehow, use the callback functions. This managed to keep the response to the client waiting but the information of the second call was still lost. I guess somehow the variables are not being synchronized.
I also did a quick test with away/async but my Javascript mojo was not enough to make it run without errors.
/* pseudo code */
function getData(var1, callback){
url= "http://test.server/bla?param="+var1;
request.get(url, function (error, response, body){
var results = [];
for(var item of JSON.parse(body).entity.resultArray) {
var o = {};
o['data1'] = item.data1;
o['data2'] = item.data2;
o['data3'] = item.data3;
getSecondStep(o, function(secondStepData){
//console.log("Callback object");
//console.log(o);
o['secondStepData'] = secondStepData;
});
results.push(o);
}
callback(results);
});
}
function getSecondStep(object, callback){
url = "http://othertest.server/foobar?param=" + object.data1;
request.get(url, function (error, response, body){
var results = [];
if(response.statusCode == 200){
for(var item of JSON.parse(body).object.array) {
var o = {}
o['data4'] = item.data4;
o['data5'] = item.data5;
results.push(o);
}
callback(results);
}
});
}
What I would like is to be able to collect all the information into one JSON object to return it back to the client.
The client will then be responsible for rendering it in a nice way.
I recommend using the async / await pattern with the request-promise-native library.
This makes API calls really easy to make and the code is cleaner when using this pattern.
In the example below I'm just calling a httpbin API to generate a UUID but the principle applies for any API.
const rp = require('request-promise-native');
async function callAPIs() {
let firstAPIResponse = await rp("https://httpbin.org/uuid", { json: true });
console.log("First API response: ", firstAPIResponse);
// Call several times, we can switch on the first API response if we like.
const callCount = 3;
let promiseList = [...Array(callCount).keys()].map(() => rp("https://httpbin.org/uuid", { json: true }));
let secondAPIResponses = await Promise.all(promiseList);
return { firstAPIResponse: firstAPIResponse, secondAPIResponses: secondAPIResponses };
}
async function testAPIs() {
let combinedResponse = await callAPIs();
console.log("Combined response: " , combinedResponse);
}
testAPIs();
In this simple example we get a combined response like so:
{
{
firstAPIResponse: { uuid: '640858f8-2e69-4c2b-8f2e-da8c68795f21' },
secondAPIResponses: [
{ uuid: '202f9618-f646-49a2-8d30-4fe153e3c78a' },
{ uuid: '381b57db-2b7f-424a-9899-7e2f543867a8' },
{ uuid: '50facc6e-1d7c-41c6-aa0e-095915ae3070' }
]
}
}
I suggest you go over to a library that supports promises (eg: https://github.com/request/request-promise) as the code becomes much easier to deal with than the callback method.
Your code would look something like:
function getData(var1){
var url = "http://test.server/bla?param="+var1;
return request.get(url).then(result1 => {
var arr = JSON.parse(body).entity.resultArray;
return Promise.all( arr.map(x => request.get("http://othertest.server/foobar?param=" + result1.data1)))
.then(result2 => {
return {
data1: result1.data1,
data2: result1.data2,
data3: result1.data3,
secondStepData: result2.map(x => ({data4:x.data4, data5:x.data5}))
}
})
});
}
And usage would be
getData("SomeVar1").then(result => ... );
The problem is that you are calling the callback while you still have async calls going on. Several approaches are possible, such us using async/await, or reverting to Promises (which I would probably do in your case).
Or you can, well, call the callback only when you have all the information available. Pseudo code follows:
function getData(var1, callback){
url= "http://test.server/bla?param="+var1;
request.get(url, function (error, response, body){
var results = [];
var items = JSON.parse(body).entity.resultArray;
var done = 0, max = items.length;
for(var item of items) {
var o = {};
o['data1'] = item.data1;
o['data2'] = item.data2;
o['data3'] = item.data3;
getSecondStep(o, function(secondStepData){
//console.log("Callback object");
//console.log(o);
o['secondStepData'] = secondStepData;
results.push(o);
done += 1;
if(done === max) callback(results);
});
}
});
}
(note that since this is pseudo code, I am not checking for errors or handling a possible empty result from request.get(...))
You need to call the callback of first function only when all the second callback functions have been called. Try this changes:
function getData(var1, callback) {
url = "http://test.server/bla?param=" + var1;
request.get(url, function (error, response, body) {
var results = [],count=0;
var arr = JSON.parse(body).entity.resultArray;
for (let [index, value] of arr.entries()) {
var o = {};
o['data1'] = item.data1;
o['data2'] = item.data2;
o['data3'] = item.data3;
getSecondStep(o, function (secondStepData) {
//console.log("Callback object");
//console.log(o);
o['secondStepData'] = secondStepData;
results[index] = o;
count++;
if (count === arr.length) {
callback(results);
}
});
}
});
}

Array returning Undefined because of asynchrony

I am accessing the API Trello, but I came across the following problem:
Trello access the information, getting the id of each existing row, the code is as follows:
var x;
var numberCardsByList = [];
trello.get("/1/boards/[idBoard]/lists/all", function(err, data) {
if (err) throw err;
console.log("Number of list: " + data.length);
for(var i=0; i<data.length; i++){
x = data[i];
findNumberCards(x);
}
});
As you can see, after getting the size, I walk all these queues with is, within the loop, attach each row in a variable x and call a function that aims to get the number of cards that queue. The code for the number of cards is as follows:
function findNumberCards(x){
trello.get("/1/lists/"+x.id+"/cards", function(err, dados){
if(err) throw err;
console.log("Name List: " + x.name + " have " + dados.length + " cards");
numberCardsByList[x.name] = dados.length;
});
}
Until then all right, but when I try to access the vector numberCardsByList after the end of the search in Trello, it returns undefined:
var x;
var numberCardsByList = [];
trello.get("/1/boards/[idBoard]/lists/all", function(err, data) {
if (err) throw err;
console.log("Quantidade de Filas: " + data.length);
for(var i=0; i<data.length; i++){
x = data[i];
findNumberCards(x);
}
});
console.log(numberCardsByList);
I am aware that it is because of asynchrony, however, can not solve.
The problem you're facing has been solved many times before. If you want to know more, search for the keyword "Promise". If you're familiar with jQuery, try and look up: $.whenAll, $.ajax().done, $.ajax().always, etc.
If you want to come up with a light weight solution yourself, here's a pointer:
By the time you get to your console.log(numberCardsByList), your requests triggered by findNumberCards haven't yet completed, making the Array empty. You'll need to make sure you know when all findNumberCards requests have completed and then log them. Alternatively, you could log them every time one of them completes.
There are roughly two approaches:
Keep track of your open requests and call a function when a request is handled.
Observe your numberCardsByList object and call a function whenever items are added (you won't know if they were added async or synchronously)
I'd suggest going with the first approach. Check out this example code and the comments:
var numberCardsByList = {};
// This array will store the url for every open request
var openRequests = [];
var removeRequest = function(url) {
var index = openRequests.indexOf(url);
if (index === -1) return;
// Remove url from array
openRequests = openRequests
.slice(0, index)
.concat(openRequests
.slice(index + 1));
};
// This will be called whenever one request completes
var onComplete = function(url) {
removeRequest(url);
// When all have completed, we can call our callback
if (openRequests.length === 0) {
onAllComplete();
}
});
// This will be called when there are no open requests left
var onAllComplete = function(data) {
console.log(numberCardsByList);
}
trello.get("/1/boards/[idBoard]/lists/all", function(err, data) {
if (err) throw err;
console.log("Number of list: " + data.length);
for (var i = 0; i < data.length; i++) {
x = data[i];
findNumberCards(x);
}
});
function findNumberCards(x) {
var url = "/1/lists/" + x.id + "/cards";
// Before we make the request, we register it:
openRequests.push(url);
trello.get(url, function(err, dados) {
numberCardsByList[x.name] = dados.length;
// When it is completed, we call onComplete
onComplete(url);
});
};
Note that this onAllComplete isn't 100% safe: it might be called multiple times if a request finishes before the next one is started.
Concluding:
If you can, I'd use a library to handle promises. If you want to try and build something yourself, you could try and keep track of the requests and execute a callback when they've all completed.
Keep in mind my above code most likely wont work for you as i dont know whats going on in your code so this is an example / explanation how to deal with your problem.
Since you are unfamiliar with async operation i will assume you dont have a prior knowledge of promises and therefore give you a less optimal solution - however promises are alot better and you should defintely learn them.
You need to execute sequence procedures inside the result of the async code.
First you'll create a function for the second operation for example:
function numberCardsByList (param1,param2){.....}
You will then change fineNumberCards to also accept a callback:
function findNumberCards(x, callback){
trello.get("/1/lists/"+x.id+"/cards", function(err, dados){
if(err) throw err;
console.log("Name List: " + x.name + " have " + dados.length + " cards");
numberCardsByList[x.name] = dados.length;
});
// pass in any params you need.
callback();
}
And then you will pass the newly created function numberCardsByList to findNumberCards or wherever you want it.
trello.get("/1/boards/[idBoard]/lists/all", function(err, data) {
if (err) throw err;
console.log("Number of list: " + data.length);
for(var i=0; i<data.length; i++){
x = data[i];
// and here we are calling findNumberCards and passing in the callback..
findNumberCards(x, numberCardsByList);
}
});
That is generally how you will deal with async operation, you will pass a callback for the next operation to be executed.
update
here is an example of how this is done with another scenario just to demonstrate the point farther.
we start by getting user
service.getUser(userName, function(err,user){
if(user) {
// we get user picture passing getPictureSize as callback
getUserPicture(user.picture, getPictureSize)
}
})
we get the pictureURL
function getUserPicture(picName, cb){
service.getPictureURL(picName, function(err, pictureURL){
if(pictureURL) {
// we then call the callback - the next async operation we want.
cb(pictureURL);
}
});
}
we get picture size - which is the last operation
function getPictureSize(pictureURL){
service.getPictureSize(pictureURL, function(err, pictureSize){
$('.picName').attr('src', picName);
$('.picName').width(pictureSize.width);
$('.picName').height(pictureSize.height);
});
}
I hope that clarify things a little.

Wating for all finished request in a loop with node request

I use the node request ajax package. So, i have an loop, in every iteration it makes an request to my server.
// realItems needs the complete value of items assigned
var realItems;
var items = [];
_.forEach(JSON.parse(body), (value, key) => {
request('myurl/' + id, (error, response, body) => {
items = JSON.parse(body)
});
});
How can i bundle all my requests from request package, so I can assign the value of items variable to the realItems at the end?
// edit:
I use react js, so in this case realItems is an state, and i can't trigger it in every loop iteration, because render triggers on every setState
There are a number of ways to approach this. Here's a brute force method that does not preserve the order of the results:
var items = [];
var cnt = 0;
_.forEach(JSON.parse(body), (value, key) => {
++cnt;
request('myurl/' + value.id, (error, response, body) => {
items.push(JSON.parse(body));
// if all requesets are done
if (--cnt === 0) {
// process items here as all results are done now
}
});
});
Here's a version that uses Bluebird promises:
var Promise = require('bluebird');
var request = Promise.promisify(require("request"));
Promise.promisifyAll(request);
var promises = [];
_.forEach(JSON.parse(body), (value, key) => {
promises.push(request('myurl/' + value.id));
});
Promise.all(promises).then(function(results) {
// all requests are done, data from all requests is in the results array
// and are in the order that the requests were originally made
});
And, here's a little bit simpler Bluebird promises method that uses a Bluebird iterator:
var Promise = require('bluebird');
var request = Promise.promisify(require("request"));
Promise.promisifyAll(request);
Promise.map(JSON.parse(body), function(value) {
return request('myurl/' + value.id);
}).then(function(results) {
// all requests are done, data is in the results array
});
Is it a requirement that you use the request package? I use async which is similar and comes with a parallel method which does exactly what you're asking -
https://github.com/caolan/async#parallel
example:
async.parallel([
function(callback){
setTimeout(function(){
callback(null, 'one');
}, 200);
},
function(callback){
setTimeout(function(){
callback(null, 'two');
}, 100);
}
],
// optional callback
function(err, results){
// the results array will equal ['one','two'] even though
// the second function had a shorter timeout.
});

Asynchronous add to array in Nodejs

var veh = [];
app.get('/updateProf', isLoggedIn, function(req, res) {
for (var i = 0; i < req.user.local.vehicles.length; i++){
Vehicles.findById(req.user.local.vehicles[i], function(err, vehicle) {
veh.push(vehicle);
console.log("GET Json: " + veh);
});
}
console.log(veh);
res.json(veh);
veh.length = 0;
});
So I am doing a get request to obtain all my vehicles that a user owns and return its json, it works fine after a refresh, but when I go to the page it shows a empty array on the initial load, if I refresh the page, the array is populated. I think the issue is something to do with it being asynchronous but I'm having a hard time thinking this way and need some advice on how to tackle this.
Yes!!
You will have to wait for all of the callbacks to finish before returning the JSON.
A solution is to keep a count of how many callbacks have been executed and when all of them have been executed you can return the JSON.
var veh = [];
app.get('/updateProf', isLoggedIn, function(req, res) {
number_processed = 0;
total = req.user.local.vehicles.length;
for (var i = 0; i < req.user.local.vehicles.length; i++){
Vehicles.findById(req.user.local.vehicles[i], function(err, vehicle) {
if(!err){
veh.push(vehicle);
}
number_processed = number_processed + 1;
if(number_processed === total){
res.json(veh);
}
console.log("GET JSON: " + veh);
});
}
veh.length = 0;
});
If you are using a more recent version of Mongoose, then you can directly use Promises that are returned by Mongoose for each query.
For example, your query can be simplified to
Vehicles.find({ _id: {'$in': req.user.local.vehicles }})
.exec()
.then(function(vehicleArr) {
res.json(vehicleArr);
});
Note that I use the $in operator to directly translate your loop into an IN condition, which takes an array of what you want to compare (in this case, it's an array of IDs)
The then() function just executes on completion of the query.
Async is a utility library to deal with this.
var async = require('async');
app.get('/updateProf', isLoggedIn, function(req, res) {
async.map(req.user.local.vehicles, function(vehicle, cb){
Vehicles.findById(vehicle, function(err, vehicle) {
if (err) cb(err, null);
console.log('GET Json: ' + vehicle);
cb(null, vehicle);
});
}, function (err, results) {
console.log(results);
res.json(results);
});
});

Asynchronously Write Large Array of Objects to Redis with Node.js

I created a Node.js script that creates a large array of randomly generated test data and I want to write it to a Redis DB. I am using the redis client library and the async library. Initially, I tried executing a redisClient.hset(...) command within the for loop that generates my test data, but after some Googling, I learned the Redis method is asynchronous while the for loop is synchronous. After seeing some questions on StackOverflow, I can't get it to work the way I want.
I can write to Redis without a problem with a small array or larger, such as one with 100,000 items. However, it does not work well when I have an array of 5,000,000 items. I end up not having enough memory because the redis commands seem to be queueing up, but aren't executed until after async.each(...) is complete and the node process does not exit. How do I get the Redis client to actually execute the commands, as I call redisClient.hset(...)?
Here a fragment of the code I am working with.
var redis = require('redis');
var async = require('async');
var redisClient = redis.createClient(6379, '192.168.1.150');
var testData = generateTestData();
async.each(testData, function(item, callback) {
var someData = JSON.stringify(item.data);
redisClient.hset('item:'+item.key, 'hashKey', someData, function(err, reply) {
console.log("Item was persisted. Result: " +reply);
});
callback();
}, function(err) {
if (err) {
console.error(err);
} else {
console.log.info("Items have been persisted to Redis.");
}
});
You could call eachLimit to ensure you are not executing too many redisClient.hset calls at the same time.
To avoid overflowing the call stack you could do setTimeout(callback, 0); instead of calling the callback directly.
edit:
Forget what I said about setTimeout. All you need to do is call the callback at the right place. Like so:
redisClient.hset('item:'+item.key, 'hashKey', someData, function(err, reply) {
console.log("Item was persisted. Result: " +reply);
callback();
});
You may still want to use eachLimit and try out which limit works best.
By the way - async.each is supposed to be used only on code that schedules the invocation of the callback in the javascript event queue (e.g. timer, network, etc) . Never use it on code that calls the callback immediately as was the case in your original code.
edit:
You can implement your own eachLimit function that instead of an array takes a generator as it's first argument. Then you write a generator function to create the test data. For that to work, node needs to be run with "node --harmony code.js".
function eachLimit(generator, limit, iterator, callback) {
var isError = false, j;
function startNextSetOfActions() {
var elems = [];
for(var i = 0; i < limit; i++) {
j = generator.next();
if(j.done) break;
elems.push(j.value);
}
var activeActions = elems.length;
if(activeActions === 0) {
callback(null);
}
elems.forEach(function(elem) {
iterator(elem, function(err) {
if(isError) return;
else if(err) {
callback(err);
isError = true;
return;
}
activeActions--;
if(activeActions === 0) startNextSetOfActions();
});
});
}
startNextSetOfActions();
}
function* testData() {
while(...) {
yield new Data(...);
}
}
eachLimit(testData(), 10, function(item, callback) {
var someData = JSON.stringify(item.data);
redisClient.hset('item:'+item.key, 'hashKey', someData, function(err, reply) {
if(err) callback(err);
else {
console.log("Item was persisted. Result: " +reply);
callback();
}
});
}, function(err) {
if (err) {
console.error(err);
} else {
console.log.info("Items have been persisted to Redis.");
}
});

Categories

Resources