Beginning with express and mongoose i often need to do some batch operations on collections.
However it usually involves callbacks which is a pain given how concurrency is coded in nodejs.
so basically
//given a collection C
var i = 0;
var doRecursive = function(i){
if(i<C.length){
C[i].callAsync(err,result){
i=+1;
return doRecursive(i);
}
}else{
return done();
}
}
doRecursive(i);
Now i dont remember what is the max stack before i get a stackover flow with node , but i guess with 10 000 elements , it wont do.
I wonder if there are other ways to handle this, if yes , what are they?
thanks
If the goal is to iterate an collection asynchronously, there are numerous control flow libraries available.
A good example is async and its reduce function:
async.reduce(C, 0, function (memo, item, callback) {
item.callAsync(function (err, result) {
if (err) {
callback(err);
} else {
callback(null, memo + result);
}
});
}, function (err, result) {
// ...
});
Note: It's not entirely clear what value you wanted to get from doRecursion, so this just uses addition for an example.
i think you can simply self-iterate instead of true recursion, since you're not drilling into a deep object:
function doRecursive (C, i){
i=i||0;
if(i<C.length){
C[i].callAsync(err, function(result){
doRecursive(C, ++i);
});
}else{
done();
}
};
doRecursive(C);
this does not create a tall stack if the code functions as labeled.
i localized C so that it executes faster and is potentially re-usable on other collections.
the pattern also makes it easy to defer it for long-running operations, just by changing
doRecursive(C, ++i);
to
setTimeout( doRecursive.bind(this, C, ++i), 50 );
Related
I want to save 1 million records to mongodb using javascript like this:
for (var i = 0; i<10000000; i++) {
model = buildModel(i);
db.save(model, function(err, done) {
console.log('cool');
});
}
I tried it, it saved ~160 records, then hang for 2 minutes, then exited. Why?
It blew up because you are not waiting for an asynchronous call to complete before moving on to the next iteration. What this means is that you are building a "stack" of unresolved operations until this causes a problem. What is the name of this site again? Get the picture?
So this is not the best way to proceed with "Bulk" insertions. Fortunately the underlying MongoDB driver has already thought about this, aside from the callback issue mentioned earlier. There is in fact a "Bulk API" available to make this a whole lot better. And assuming you already pulled the native driver as the db object. But I prefer just using the .collection accessor from the model, and the "async" module to make everything clear:
var bulk = Model.collection.initializeOrderedBulkOp();
var counter = 0;
async.whilst(
// Iterator condition
function() { return count < 1000000 },
// Do this in the iterator
function(callback) {
counter++;
var model = buildModel(counter);
bulk.insert(model);
if ( counter % 1000 == 0 ) {
bulk.execute(function(err,result) {
bulk = Model.collection.initializeOrderedBulkOp();
callback(err);
});
} else {
callback();
}
},
// When all is done
function(err) {
if ( counter % 1000 != 0 )
bulk.execute(function(err,result) {
console.log( "inserted some more" );
});
console.log( "I'm finished now" ;
}
);
The difference there is using both "asynchronous" callback methods on completion rather that just building up a stack, but also employing the "Bulk Operations API" in order to mitigate the asynchronous write calls by submitting everything in batch update statements of 1000 entries.
This does not not only not "build up a stack" of function execution like your own example code, but also performs efficient "wire" transactions by not sending everything all in individual statements, but rather breaking up into manageable "batches" for server commitment.
You should probably use something like Async's eachLimit:
// Create a array of numbers 0-999999
var models = new Array(1000000);
for (var i = models.length; i >= 0; i--)
models[i] = i;
// Iterate over the array performing a MongoDB save operation for each item
// while never performing more than 20 parallel saves at the same time
async.eachLimit(models, 20, function iterator(model, next){
// Build a model and save it to the DB, call next when finished
db.save(buildModel(model), next);
}, function done(err, results){
if (err) { // When an error has occurred while trying to save any model to the DB
console.error(err);
} else { // When all 1,000,000 models have been saved to the DB
console.log('Successfully saved ' + results.length + ' models to MongoDB.');
}
});
I want to create a dashboard where I'll be showing simple stats like count of users, count of comments etc.
I am counting my collections using something like
User.count(function(err, num){
if (err)
userCount=-1;
else
userCount = num;
Comment.count(function(err, num){
if (err)
commentCount=-1;
else
commentCount = num;
SomethingElse.count(...)
})
})
which is a bit ugly I think. Is there any other way to do it without nesting 4 counts?
You can take advantage of a module like async to do you want in a more readable way. The module is globalized by Sails by default, meaning it's available in all of your custom code. Using async.auto, you would rewrite the above as:
async.auto({
user: function(cb) {User.count.exec(cb);},
comment: function(cb) {Comment.count.exec(cb);},
somethingElse: function(cb) {SomethingElse.count.exec(cb);},
anotherThing: function(cb) {AnotherThing.count.exec(cb);}
},
// The above 4 methods will run in parallel, and if any encounters an error
// it will short-circuit to the function below. Otherwise the function
// below will run when all 4 are finished, with "results" containing the
// data that was sent to each of their callbacks.
function(err, results) {
if (err) {return res.serverError(err);}
// results.user contains the user count, results.comment contains
// comments count, etc.
return res.json(results);
}
);
I'm new to js.
I am using express for node js, and mongoose as a mongo orm.
function direct_tags_search_in_db(tags){
var final_results = [];
for (var i=0; i<tags.length; ++i) {
var tag = tags[i];
Question.find({tags: tag}).exec(function(err, questions) {
final_results.push(questions);
if (i == tags.length -1 ){
return final_results;
}
});
}
};
I get empty results, because of the asynchronously of the find. But I don't know what the best approach for this.
Appriciate a little help, thanks.
You will often find that methods such as Question.find().exec that accept a function as an argument are async. It is especially common for methods that perform network requests or file system operations. These are most commonly referred to as a callback. That being the case, if you would like something to occur when the async task(s) complete, you need to also implement a callback.
Also, it is possible that your reference to tag is being changed in a way that is likely undesired. There are a number of solutions, here is a simple one.
function direct_tags_search_in_db(tags, callback){
var final_results = [];
// Array.forEach is able to retain the appropriate `tag` reference
tags.forEach(function(tag){
Question.find({tags: tag}).exec(function(err, questions) {
// We should be making sure to handle errors
if (err) {
// Return errors to the requester
callback(err);
} else {
final_results.push(questions);
if (i == tags.length -1 ){
// All done, return the results
callback(null, final_results);
}
}
});
});
};
You will notice that when we implement our own callback, that we follow the same common pattern as the callback for Question.find().exec(function(err, result){}); -- first argument a potential error, second argument the result. That is why when we return the results, we provide null as the first argument callback(null, final_results);
Quick example of calling this function:
direct_tags_search_in_db([1, 2, 3], function(err, results){
if (err) {
console.error('Error!');
console.error(err);
} else {
console.log('Final results');
console.log(results);
}
});
Another option for solving various async goals is the async module, promises, or otherwise.
I have frustrating problem with learning to work with callback style of programming in Node.js. I have a query to a MongoDB database. If I pass in a function to execute on the result it works but I'd rather flatten it out and have it return the value. Any help or direction on how to do this correctly is appreciated. Here's my code:
var getLots = function(response){
db.open(function(err, db){
db.collection('lots', function(err, collection){
collection.find(function(err, cursor){
cursor.toArray(function(err, items){
response(items);
})
})
})
})
}
I want something more like this:
lots = function(){
console.log("Getting lots")
return db.open(openCollection(err, db));
}
openCollection = function(err, db){
console.log("Connected to lots");
return (db.collection('lots',findLots(err, collection))
);
}
findLots = function(err, collection){
console.log("querying 2");
return collection.find(getLots(err, cursor));
}
getLots = function(err, cursor) {
console.log("Getting lots");
return cursor.toArray();
}
Where the final set of data would bubble back up through the function calls.
The problem is that I get an error from Node.js saying that err is not defined or that the collection is not defined. For some reason when I nest the callbacks the correct object is getting passed down. When I try going to this flattened style it complains that things are not defined. I don't know how to get it to pass the necessary objects.
What you need is one of the many control flow libraries available for node via npm and catalogued on the Node.js wiki. My specific recommendation is caolan/async, and you would use the async.waterfall function to accomplish this type of flow where each async operation must be executed in order and each requires the results from the previous operation.
Pseudocode example:
function getLots(db, callback) {
db.collection("lots", callback);
}
function findLots(collection, callback) {
collection.find(callback);
}
function toArray(cursor, callback) {
cursor.toArray(callback);
}
async.waterfall([db.open, getLots, find, toArray], function (err, items) {
//items is the array of results
//Do whatever you need here
response(items);
});
async is a good flow control library. Frame.js offers some specific advantages like better debugging, and better arrangement for synchronous function execution. (though it is not currently in npm like async is)
Here is what it would look like in Frame:
Frame(function(next){
db.open(next);
});
Frame(function(next, err, db){
db.collection('lots', next);
});
Frame(function(next, err, collection){
collection.find(next);
});
Frame(function(next, err, cursor){
cursor.toArray(next);
});
Frame(function(next, err, items){
response(items);
next();
});
Frame.init();
I have an interesting case where I need to do a few queries in MongoDB using Mongoose, but the response is returning before I can complete all of them.
I have two document types, list and item. In one particular call, I need to get all of the lists for a particular user, then iterate over each of them and fetch all of the items and append them to the appropriate list before returning.
List.find({'user_id': req.params.user_id}, function(err, docs){
if (!err) {
if (docs) {
var results = [];
_und.each(docs, function(value, key) {
var list = value.toObject();
list.items = [];
Item.find({'list_id': value._id}, function(err, docs) {
if (!err) {
_und.each(docs, function(value, key) { list.items.push(value.toObject()); });
results.push(list);
}
else {
console.log(err);
}
});
});
res.send(results);
(_und is how I've imported underscore.js)
Obviously the issue are the callbacks, and since there's multiple loops I can't return within a callback.
Perhaps this is a case where I would need to get the count in advance and check it on every iteration to decide when to return the results. This doesn't seem elegant though.
Code solution
First of all the issue is with the code. Your sending the results before the Item.find queries finish. You can fix this quite easily
var count = docs.length + 1;
next()
_und.each(docs, function(value, key) {
var list = value.toObject();
list.items = [];
Item.find({
'list_id': value._id
}, function(err, docs) {
if (!err) {
_und.each(docs, function(value, key) {
list.items.push(value.toObject());
});
// push asynchronous
results.push(list);
next()
}
else {
console.log(err);
}
});
});
function next() {
--count === 0 && finish()
}
function finish() {
res.send(results)
}
The easiest way is reference counting, you default count to the number of documents. Then every time your finished getting an item you call next and decrement the count by one.
Once your finished getting all items your count should be zero. Note that we do .length + 1 and call next immediately. This gaurds against the the case where there are no documents, which would otherwise do nothing.
Database solution
The best solution is to use mongo correctly. You should not be doing what is effectively a join in your code, it's slow and inefficient as hell. You should have a nested document and denormalize your list.
so list.items = [Item, Item, ...]
As a further aside, avoid mongoose, it's inefficient, use the native mongo driver.
I use with this module:
https://github.com/caolan/async