Identify Closure Memory Leak - javascript

I'm currently writing a simple api where you post an array (length = 200) and since each element in the array needs to do 1-2 look up requests, I'm using the async library to control the flow of things. I'm using node 0.12.5 & Express.
router.post('/data', function(req, res, next) {
var cloudDB = db.cloudant.use('events');
var tempStorage = {"docs": []};
// This each loop is to make sure all events get iterated through before submitting response
async.each(req.body, function(singleEvent, loopCallback) {
// this should be async waterfall or something better to organize it
async.waterfall(
[
function(callback) { // get user data from db
db.getUserInfo(singleEvent.email, function (error, dbResponse) {
if(error) { // Houston, we have a problem
return callback(error);
}
return callback(null, dbResponse);
})
},
function(dbResponse, callback) { // decide what to do about results
if(!dbResponse) { // we were unable to get the user from DB
db.searchForUser(singleEvent.email, function (err, searchResponse) {
if(err)
return callback(err);
else
return callback(null, JSON.parse(searchResponse));
})
}
else {
return callback(null,JSON.parse(dbResponse));
}
},
function(userInfo, callback) { // combine data into proper logic
callback(null,combineEventAndUserData(singleEvent,userInfo));
}
],
function (err, result) {
// User event has been processed, so if there are no errors, lets add it to the queue
if(err) {
console.log(err);
}
else {
tempStorage.docs.push(result);
}
loopCallback(); // We're done with this singleEvent
}
)
}, function(err) { // function gets called when all singleEvents have been looped through
console.log("Finished each");
if(err) {
res.status(500).send(err);
}
else {
cloudDB.bulk(tempStorage, function(err, body) {
if(!err) {
res.status(200).send(body);
}
else {
res.status(500).send(err);
}
})
}
});
});
So, this code works! However... (sniff sniff), I seem to have created a memory leak. I have taken a look at both memwatch-next and heapdump, and all I've been able to tell was that 'arrays' keep growing when I look at the heap dump.
I don't know why, but I have a suspicion that this might have something to do with closures and how I'm storing the items generated from each of the waterfalls and perhaps the tempStorage.docs is not being released? Am I storing the tempStorage in the correct way? Or should I change how I do that?

Related

Nodejs, close mongo db connection via callback

I have the problem with callbacks, async thinking etc.
Execution program:
Connect to mongoDb.
Create url - https://example.com + add part from locArray.
Send get request (for each).
Save data to mongo db.
Close connection.
Problem:
If the connection was closed on last line in jsonDataFromApi - "server instance pool was destroyed" before all data from each request was saved to db
So callback(db) was sent to another place - closeMongoDb
but error was appeared
"Cannot read property 'close' of undefined".
I think, the problem is with async, send callbacks etc.
const MongoClient = require('mongodb').MongoClient;
const Array = require('node-array');
const request = require('request');
var locationArray = [
'location1',
'location2',
'location3',
'location4'
];
var dataFromLocApi = (loc, callback) => {
request({
url: `https://example.com/${loc}`,
json: true
}, (error, response, body) => {
if (error){
callback('Error connection to url.');
} else{
callback(undefined, body.result);
}
});
};
var jsonDataFromApi = (urldb, callback) => {
MongoClient.connect(urldb, (err, db) => {
if (err) {
console.log('MongoDb connection error.');
}
console.log('MongoDb - connected.');
locationArray.forEachAsync(function(loc, index, arr) {
dataFromLocApi(loc, (errorMessage, results) => {
if (errorMessage) {
console.log(errorMessage);
} else {
console.log(JSON.stringify(results, undefined, 2));
db.collection('testCollection').insert(results, function(error, record) {
if (error)
throw error;
console.log("data saved");
});
}
});
}, function() {
console.log('complete');
});
callback(db);
});
}
var closeMongoDb = (urldb, callback) => {
jsonDataFromApi(urldb, (error, db) => {
if (error){
callback('Close connection - failure');
} else{
db.close();
console.log('MongoDb connections was closed.');
}
});
}
closeMongoDb('mongodb://127.0.0.1:27017/testDb', (err, db) => {
console.log('DONE');
} );
There is definitely a problem with asynchrony there.
You're not waiting for the items to be processed before calling the db.close().
Also, the functions that you have defined have the unclear semantics. For example, the function closeMongoDb should basically close the DB and that's it. But here does the other job: fetches the data and closes the DB afterwards.
Also, I'd probably use the async module instead of node-array as the last one seems to solve other problem.
I've refactored the code. Please read my comments. I tried to make it as clear as possible.
const MongoClient = require("mongodb").MongoClient;
const request = require("request");
// We are going to use the async module
// This is a classical module to handle async behavior.
const async = require("async");
// As you can see this function accepts a callback
// If there is an error connecting to the DB
// it passes it up to the caller via callback(err)
// This is a general pattern
const connectToDb = function(urldb, callback) {
MongoClient.connect(urldb, (err, db) => {
if (err) {
console.log("MongoDb connection error.");
callback(err);
return;
}
// If everything is OK, pass the db as a data to the caller.
callback(undefined, db);
});
};
// This method fetches the data for a single location.
// The logic with errors/data is absolutely the same.
const getData = (loc, callback) => {
request(
{
url: `https://example.com/${loc}`,
json: true
},
(error, response, body) => {
if (error) {
callback("Error connection to url.");
return;
}
callback(undefined, body.result);
}
);
};
// This function goes over each location, pulls the data and saves it to the DB
// Last parameter is a callback, I called it allDataFetchedCb to make it clear
// that we are calling it after ALL the locations have been processed
// And everything is saved to the DB.
const saveDataFromLocations = function(locations, db, allDataFetchedCb) {
// First param here is an array of items
// The second one is an async function that we want to execute for each item
// When a single item is processed we call the callback. I named it 'locProcessedCB'
// So it's clear what happens.
// The third parameter is a callback that is going to be called when ALL the items
// have been processed.
async.each(
locations,
function(loc, locProcessedCb) {
getData(loc, (apiErr, results) => {
if (apiErr) {
console.log(apiErr);
// Well, we couldn't process the item, pass the error up.
locProcessedCb(apiErr);
return;
}
console.log(
`Obtained the data from the api: ${JSON.stringify(
results,
undefined,
2
)}`
);
db.collection("testCollection").insert(results, function(dbError) {
if (dbError) {
// Also an error, we couldn't process the item.
locProcessedCb(dbError);
return;
}
// Ok the item is processed without errors, after calling this
// So we tell the async.each function: ok, good, go on and process the next one.
locProcessedCb();
});
});
},
function(err) {
// We gonna get here after all the items have been processed or any error happened.
if (err) {
allDataFetchedCb(err);
return;
}
console.log("All the locations have been processed.");
// All good, passing the db object up.
allDataFetchedCb(undefined, db);
}
);
};
// This function is an entry point.
// It calls all the above functions one by one.
const getDataAndCloseDb = function(urldb, locations, callback) {
//Well, let's connect.
connectToDb(urldb, (err, db) => {
if (err) {
callback(err);
return;
}
// Now let's get everything.
saveDataFromLocations(locations, db, (err, db) => {
if (err) {
callback(err);
return;
}
// If somehow there is no db object, or no close method we wanna know about it.
if (!db || !db.close) {
callback(new Error("Unable to close the DB Connection."));
}
// Closing the DB.
db.close(err => {
// If there's no error err === undefined or null
// So this call is equal to callback(undefined);
callback(err);
});
});
});
};
const locationArray = ["location1", "location2", "location3", "location4"];
// Finally calling the function, passing all needed data inside.
getDataAndCloseDb("mongodb://127.0.0.1:27017/testDb", locationArray, err => {
if (err) {
console.error(
`Unable to fetch the data due to the following reason: ${err}`
);
return;
}
console.log("Done successfully.");
});
I didn't run this code as I don't have the URL etc. So please try it yourself and debug if needed.

How can I execute one query after another and process that data?

I have an app.get that will return customer data and customer purchases. Inside this app.get I need run two mysql calls and build a an array to pass back.
How can I execute one query after another and process that data?
app.get('/customer', function (req,res) {
var response1 = [];
var response2 = [];
var processedData = [];
connection.query('QUERY HERE', function(err, rows, fields) {
if (!err){
response.push({rows});
} else {
res.status(400).send(err);
}
});
//for loop 'response' results and perform another query
for (var i = 0; i < response1.length; i++) {
var row = response1[i];
connection.query('QUERY HERE FOR row.customerid', function(err, rows, fields) {
if (!err){
processedData.push({'Customer Name:' : row.customername, 'purchases' : rows});
} else {
res.status(400).send(err);
}
});
}
//Send json back
res.setHeader('Content-Type', 'application/json');
res.status(200).send(JSON.stringify(processedData));
});
There is a very convenient module called async.js that provides a bunch of functions for doing complex async operations. Particularly,
async.waterfall() is great when you need to pass down results from one async operation/task to another.
async.mapSeries() is great when you need to create a new array with results from an array of async operation/tasks.
Let's use both.
If I understood your code correctly, the code would look something similar to
app.get('/customer', function (req, res) {
async.waterfall([
// each task is passed a callback 'cb' as last argument;
// you MUST call it at least and at most once within each task;
// if you pass an error into the callback as the first argument, it will stop the async function
function task1 (cb1) {
//connection.query('QUERY HERE', function(err, rows, fields) {
// if (err) return cb1(err); // stop waterfall() if an error occurred
// cb1(null, rows, fields); // pass results down to next task
//});
connection.query('QUERY HERE', cb1); // shorter version
},
function task2 (rows, fields, cb2) {
// iterate and run async operation over each element in array 'rows'
async.mapSeries(rows, function getPurchases (row, cb3) {
connection.query('QUERY HERE FOR row.customerid', function (err, purchases, fields) {
if (err) return cb3(err); // stop mapSeries() if an error occurred
cb3(null, { 'Customer Name': row.customername, 'purchases': purchases })
});
}, function (err, customers) {
// when mapSeries() is done iterating OR if an error occurred, it will come here
if (err) return cb2(err); // stop waterfall() if an error occurred
cb2(null, customers)
});
// }, cb2); // shorter version
}
], function (err, customers) {
// when waterfall() is done all its tasks OR if an error occurred, it will come here
// handle error and send response here
});
});

Node js array dashboard

I'm doing the backend of my app with node js. In this case i'm trying to get the typical dashboard like facebook, instagram,....
Where for one user i'm trying to get the users that he follows. And when i get the array of users following, i find the "recetas" that they have (one user can have more than one). And finally i add all this recetas in an array but the problem is that is returning me empty.
getDashboard = function (req, res) {
var myarray = new Array();
//myarray = [];
User.findById(req.params.id, function (err, user) {
if (!user) {
res.send(404, 'User not found');
}
else {
var a = user.following;
a.forEach(function (current_value) {
Receta.find({ "user_id": current_value._id }, function (err, recetas) {
if (!err) {
recetas.forEach(function (receta) {
myarray.push(receta);
}
} else {
console.log('Error: ' + err);
}
});
})
res.send(myarray);
}
});
};
You are dealing with a common async issue. Receta.find is asynchronous, it is not a blocking operation, so res.send is called before all of your Receta.find calls have completed. You can get around this issue by using Promises, assuming they are available in your version of Node:
var a = user.following;
var promises = a.map(function(current_value) {
return new Promise(function(resolve, reject) {
Receta.find({"user_id":current_value._id}, function (err, recetas) {
if(!err) {
resolve(recetas);
} else {
reject(err);
}
});
});
});
Promise.all(promises).then(function(allData) {
res.send(allData);
}).catch(function(error) {
res.send(error);
});
If native Promises aren't available, you can use a library like Q or bluebird
res.send(myarray); is being called before a.forEach completes due to Receta.find which is I/O.
call res.send only when the loop is finished and recetas returned.

Node.js/Express - Code inside callback is running after completing the first function

I'm making on app for camps where user can come and create their camping experience and comments over it. I try to remove first if any camps there in mongodb, after that to make 3 dummy camps data and then associate comments on it. but it seems always all 3 camps creating first and then comments because of that comments can't be associated with them.
Campground.remove({}, function (err) {
if (err) {
console.log('some error in campground');
}
campdata.forEach(function (seed) {
Campground.create(seed, function (err, createdData) {
if (err) {
console.log('camps not created');
} else {
// create comments
Comment.create({
description: 'this is the best place but wish if there is internet',
author: 'satty'
}, function (err, commentdata) {
if (err) {
console.log(err);
} else {
createdData.comments.push(commentdata);
createdData.save();
console.log(commentdata);
}
});
console.log(createdData);
} //else completed
}); // campground create completed
}); // for each
console.log('removed campgrounds');
}); // campground remove
Remember that Node is asynchronous. forEach runs synchronously, but the functions within are asynchronous — meaning that they are still executing after the forEach loop completes. This is a problem for you because the iterator on forEach has already reached the last element in the array long before the asynchronous comment-adding function executes.
One way to solve this is to use async:
(Removed superfluous code for brevity)
let async = require('async')
Campground.remove({}, function(err) {
async.each(campdata, function(seed, callback) {
Campground.create(seed, function(err, createdData) {
let comment = {
description: 'this is the best place but wish if there is internet',
author: 'satty'
}
Comment.create(comment, function(err, commentdata) {
createdData.comments.push(commentdata)
createdData.save()
callback(err)
})
})
}, function(err) {
// all done!
})
})

Mongodb find() returns undefined (node.js)

Ive been playing around with mongodb in node.js. I have made a basic collection with some data (i know its there ive checked). When I try to run a find() on the collection it returns undefined. I dont know why this is. The code is below:
function get_accounts(){
var MongoClient = mongodb.MongoClient;
var url = "url";
MongoClient.connect(url, function (err, db) {
if (err) {
console.log('Unable to connect to the mongoDB server. Error:', err);
} else {
//HURRAY!! We are connected. :)
console.log('Connection established to database');
var collection = db.collection('accounts');
collection.find().toArray(function(err, docs) {
console.log("Printing docs from Array")
docs.forEach(function(doc) {
console.log("Doc from Array ");
console.dir(doc);
});
});
console.log("mission complete");
}
db.close();
}
);
}
If you know why this is happening i would like to hear your thoughts. thanks! The database is a mongolab hosted database if that makes any difference.
You are getting an undefined value because of the asynchronous nature of node.js, nowhere in your code exists logic that tells the console.log statement to wait until the find() statement finishes before it prints out the documents. You have to understand the concept of callbacks in Node.js. There are a few problems here, though, that you could fix. A lot of people getting started with node have the tendency to nest lots of anonymous functions, creating the dreaded "pyramid of doom" or callback hell. By breaking out some functions and naming them, you can make it a lot cleaner and easier to follow:
var MongoClient = require("mongodb").MongoClient
// move connecting to mongo logic into a function to avoid the "pyramid of doom"
function getConnection(cb) {
MongoClient.connect("your-mongo-url", function(err, db) {
if (err) return cb(err);
var accounts = db.collection("accounts");
cb(null, accounts);
})
}
// list all of the documents by passing an empty selector.
// This returns a 'cursor' which allows you to walk through the documents
function readAll(collection, cb) {
collection.find({}, cb);
}
function printAccount(account) {
// make sure you found your account!
if (!account) {
console.log("Couldn't find the account you asked for!");
}
console.log("Account from Array "+ account);
}
// the each method allows you to walk through the result set,
// notice the callback, as every time the callback
// is called, there is another chance of an error
function printAccounts(accounts, cb) {
accounts.each(function(err, account) {
if (err) return cb(err);
printAccount(account);
});
}
function get_accounts(cb) {
getConnection(function(err, collection) {
if (err) return cb(err);
// need to make sure to close the database, otherwise the process
// won't stop
function processAccounts(err, accounts) {
if (err) return cb(err);
// the callback to each is called for every result,
// once it returns a null, you know
// the result set is done
accounts.each(function(err, account) {
if (err) return cb(err)
if (hero) {
printAccount(account);
} else {
collection.db.close();
cb();
}
})
}
readAll(collection, processAccounts);
})
}
// Call the get_accounts function
get_accounts(function(err) {
if (err) {
console.log("had an error!", err);
process.exit(1);
}
});
You might have to add an empty JSON object inside the find.
collection.find({})
Documentation can be found here.
You must enter this code in an async function and you will be fine here data is the your desired value and you must use promises to not make your code look messy.
var accountCollection = db.collection('accounts);
let data = await accountCollection.find().toArray.then(data=>data).catch(err=>err);

Categories

Resources