Ideally, I would like to be able to compare data from the cursor object and the following cursor.next() object in order to perform other functions.
The following code results in the nextDoc (or nextState) being undefined.
var data = db.collection('data');
var myCursor = data.find({});
myCursor.sort({'State': 1, 'Temperature': -1});
var nextDoc = myCursor.nextObject( function (err, doc) {
if (err) throw err;
});
myCursor.each( function (err, doc) {
if(err) throw err;
if(doc == null) {
return db.close();
}
var currState = doc.State;
var nextState = nextDoc;
console.log("currState:" + currState + " nextState:" + nextState);
});
Well if you think about it, asking for the "next" document really does not make that much sense as soon enough in your execution you are going to hit the "end" of the query results and there just will not be a "next".
Therefore the better approach is to "keep" the "last" result and then compare to the current item. The logic is also quite simple as the very first has nothing to compare to, so there is little point. But everything else works fine.
A preferce here is to use the stream interface which is natively presented in the latest driver (otherwise call .stream() ), as there is a bit more control here in case you might want to do other things such as an .update() or other async operation:
var data = db.collection('data');
var myCursor = data.find({});
myCursor.sort({'State': 1, 'Temperature': -1});
var prevState = null;
myCursor.on("err",function(err) {
throw err;
});
myCursor.on("end",function() {
db.close();
});
myCursor.on("data",function(data) {
myCursor.pause(); // stops other events emitting
if ( prevState != null ) {
console.log("prevState: %s currState %s",prevState,data.State);
}
prevState = data.State;
myCursor.resume(); // Restart when finsihed with document
});
The .pause() and .resume() methods there make sure that the variable outside of the scope is respected with each iteration as no more "data" events are emitted until resumed.
Of course if you did do an asynchronous process then the .resume() should be called within it's callback function to ensure the "loop" does not continue until that processing is complete.
Therefore it is just looking at the problem the other way around, and look back not forwards, which is something cursors and streams happily do.
Related
I'm creating a YouTube upload notification bot for a Discord Server I am in using the YouTube RSS Feed and am having problems with it. I have issues with the bot sending the same video twice even though I've tried everything to fix it. The bot cycles through different users in a for loop and checks the user's latest video's ID with one stored in a JSON file. If they do not match, it sends a message and updates the JSON. Here is my current code:
function update(videoId, n) {
var u = JSON.parse(fs.readFileSync("./jsons/uploads.json"))
u[n].id = videoId
fs.writeFile("./jsons/uploads.json", JSON.stringify(u, null, 2), (err) => {
if (err) throw err;
// client.channels.cache.get("776895633033396284").send()
console.log('Hey, Listen! ' + n + ' just released a new video! Go watch it: https://youtu.be/' + videoId + "\n\n")
});
}
async function uploadHandler() {
try {
var u = require('./jsons/uploads.json');
var users = require('./jsons/users.json');
for (i = 0; i < Object.keys(users).length; i++) {
// sleep(1000)
setTimeout(function(i) {
var username = Object.keys(users)[i]
let xml = f("https://www.youtube.com/feeds/videos.xml?channel_id=" + users[username]).text()
parseString(xml, function(err, result) {
if (err) {} else {
let videoId = result.feed.entry[0]["yt:videoId"][0]
let isMatch = u[username].id == videoId ? true : false
if (isMatch) {} else {
if (!isMatch) {
u[username] = videoId
update(videoId, username)
}
}
}
});
}, i * 1000, i)
}
} catch (e) {
console.log(e)
}
}
My code is rather simple but I've had the same issue with other codes that use this method; therefore what would be the best way to accomplish this? Any advice is appreciated
There are a few issues with your code that I would call out right off the bat:
Empty blocks. You use this especially with your if statements, e.g. if (condition) {} else { // Do the thing }. Instead, you should negate the condition, e.g. if (!condition) { // Do the thing }.
You declare the function uploadHandler as async, but you never declare that you're doing anything asynchronously. I'm suspecting that f is your asynchronous Promise that you're trying to handle.
You've linked the duration of the timeout to your incrementing variable, so in the first run of your for block, the timeout will wait zero seconds (i is 0, times 1000), then one second, then two seconds, then three...
Here's a swag at a refactor with some notes that I hope are helpful in there:
// Only require these values once
const u = require('./jsons/uploads.json');
const users = require('./jsons/users.json');
// This just makes the code a little more readable, I think
const URL_BASE = 'https://www.youtube.com/feeds/videos.xml?channel_id=';
function uploadHandler() {
Object.keys(users).forEach(username => {
// We will run this code once for each username that we find in users
// I am assuming `f` is a Promise. When it resolves, we'll have xml available to us in the .then method
f(`${URL_BASE}${username}`).then(xml => {
parseString(xml, (err, result) => {
if (!err) {
const [videoId] = result.feed.entry[0]['yt:videoId']; // We can use destructuring to get element 0 from this nested value
if (videoId !== u[username].id) {
// Update the in-memory value for this user's most recent video
u[username].id = videoId;
// Console.log the update
console.log(`Hey listen! ${username} just released a new video! Go watch it: https://youtu.be/${videoId}\n\n`);
// Attempt to update the json file; this won't affect the u object in memory, but will keep your app up to date
// when you restart it in the future.
fs.writeFile('./jsons/uploads.json', JSON.stringify(u, null, 2), err => {
if (err) {
console.err(`There was a problem updating uploads.json with the new videoId ${videoId} for user ${username}`);
}
});
}
}
});
})
// This .catch method will run if the call made by `f` fails for any reason
.catch(err => console.error(err));
});
}
// I am assuming that what you want is to check for updates once every second.
setInterval(uploadHandler, 1000);
I'm trying to retrieve my comments on a given post for my website but I am unable to build up the nested comments because of the asynchronous nature of node.js.
getBlock([], function(){});
function getBlock(comments, callback) {
comments.forEach(function(comment) {
getChildComments(comment, function(err, children) {
if (children) {
getBlock(children, callback);
}
comment.Comments = children;
/* not sure how to decide when to be done?*/
callback(null, comments);
});
});
}
The above code works fine for synchronous code but does not work for asynchronous because I cannot tell when comments contains all of the data to return to the browser.
I tried to keep track of the recursive calls and end when there were 0 calls left but that was buggy and sometimes would return early depending on the tree structure.
You could keep a count of the work still to do, and when it reaches zero, you call the caller's callback function. Each instance of an executing function in the recursion tree, will define its own callback, so that only the call made by the top level instance will call the callback in the first statement (outside of the function body):
function getBlock(comments, callback) {
if (!comments || !comments.length) {
// Nothing to do, call back synchronously
callback(comments);
return;
}
var leftOver = comments.length;
comments.forEach(function(comment) {
getChildComments(comment, function(err, children) {
comment.Comments = children;
// provide custom callback:
getBlock(children, function () {
// only call parent's callback when all is done here:
if (--leftOver === 0) callback(comments);
});
});
});
}
Unlike your example code, the above must not be called with an empty array, but with an array of comment objects of which you want to retrieve the hierarchy below them. To get everything, you would pass an array with one dummy comment object, which would have an undefined id (to be matched with the parentId references of comments that have no parents). Something like this:
getBlock([container], function(){
console.log(container);
});
Below is a working implementation, which uses mock data and setTimeout to simulate the asynchronous getChildComments:
function Comment(id, text, parentId) {
this.id = id;
this.text = text;
this.parentId = parentId;
}
var mockData = [
new Comment(1, "Michal Jackson died today"),
new Comment(2, "How did he die?", 1),
new Comment(3, "His doctor gave him too much of the white stuff", 2),
new Comment(4, "He died in his sleep", 2),
new Comment(5, "Oh my god, this can't be true!?", 1),
new Comment(6, "He will be greatly missed", 1),
new Comment(7, "I am working in my garden"),
new Comment(8, "Happy birthday, friend!"),
new Comment(9, "Thank you!", 8),
];
function getChildComments(parentComment, callback) {
// Mock asynchronous implementation, for testing the rest of the code
setTimeout(function () {
var children = mockData.filter(function (comment) {
return comment.parentId === parentComment.id;
});
callback(null, children);
}, 0);
}
var container = new Comment(); // dummy node to collect complete hierarchy into
getBlock([container], function(){
console.log(container);
});
function getBlock(comments, callback) {
if (!comments || !comments.length) {
// Nothing to do, call back synchronously
callback(comments);
return;
}
var leftOver = comments.length;
comments.forEach(function(comment) {
getChildComments(comment, function(err, children) {
comment.Comments = children;
// provide custom callback:
getBlock(children, function () {
// only call parent's callback when all is done here:
if (--leftOver === 0) callback(comments);
});
});
});
}
Performance considerations
The above is a direct answer to "How can I recursively and asynchronously build a tree of unknown size", but it might not be the most efficient way to get the final result.
You get your data from a Postgres database, and probably perform a query for each call to getChildComments: this might take a relatively long time to complete, and puts quite a load on your database engine.
It might be more efficient to perform a single query to retrieve the whole hierarchy of comments.
Is there anyway to duplicate an collection through the nodejs mongodb driver?
i.e. collection.copyTo("duplicate_collection");
You can eval copyTo() server-side though it will block the entire mongod process and won't create indexes on the new collection.
var copyTo = "function() { db['source'].copyTo('target') };"
db.eval(copyTo, [], function(err, result) {
console.log(err);
});
Also note the field type warning.
"When using db.collection.copyTo() check field types to ensure that the operation does not remove type information from documents during the translation from BSON to JSON. Consider using cloneCollection() to maintain type fidelity."
Try to avoid .eval() if this is something you want to do regularly on a production system. It's fast, but there are problems.
A better approach would be to use The "Bulk" operations API, and with a little help from the "async" library:
db.collection("target",function(err,target) {
var batch = target.initializeOrderedBulkOp();
counter = 0;
var cursor = db.collection("source").find();
var current = null;
async.whilst(
function() {
cursor.nextObject(function(err,doc) {
if (err) throw err;
// .nextObject() returns null when the cursor is depleted
if ( doc != null ) {
current = doc;
return true;
} else {
return false;
}
})
},
function(callback) {
batch.insert(current);
counter++;
if ( counter % 1000 == 0 ) {
batch.execute(function(err,result) {
if (err) throw err;
var batch = target.initializeOrderedBulkOp();
callback();
});
}
},
function(err) {
if (err) throw err;
if ( counter % 1000 != 0 )
batch.execute(function(err,result) {
if (err) throw err;
// job done
});
}
);
});
It's fast, not as fast as .eval() but does not block either the application or server.
Batch operations will generally take as many operations as you throw at them, but using a modulo as a limiter allows a little more control and essentially avoids loading an unreasonable amount of documents in memory at a time. Keep in mind that whatever the the case the batch size that is sent cannot exceed more that 16MB between executions.
Another option to duplicate a collection would be to use aggregate method on a collection and the $out parameter. Here is an example inside of an async function:
const client = await MongoClient.connect("mongodb://alt_dev:aaaaa:27018/meteor");
const db = client.db('meteor');
const planPrice = await db.collection('plan_price');
const planPriceCopy = await planPrice.aggregate([{$match: {}}, {$out: planPriceUpdateCollection}]);
await planPriceCopy.toArray();
This will create a copy of the original collection with all of its content.
I'm new to mongoose/mongodb and I am trying to do some sort of error handling with my document save.
I am trying to create a stub id to store into the db for easier data retrieval later on (and also to put into the url bar so people can send links to my website to that particular page more easily -- like jsfiddle or codepen).
Basically I want to search for a document with a page_id and if it exists, I want to regenerate that page_id and search until it gets to one that's unused like this:
while(!done){
Model.findOne({'page_id': some_hex}, function (err, doc) {
if(doc){
some_hex = generate_hex();
}
else
{
done = true;
}
});
}
model.page_id = some_hex;
model.save();
However, since mongoose is asynchronous, the while loop will pretty much run indefinitely while the find works in the background until it finds something. This will kill the resources on the server.
I'm looking for an efficient way to retry save() when it fails (with a change to page_id). Or to try and find an unused page_id. I have page_id marked as unique:true in my schema.
Retrying should be performed asynchronously:
var tryToSave = function(doc, callback) {
var instance = new Model(doc);
instance.page_id = generate_hex();
instance.save(function(err) {
if (err)
if (err.code === 11000) { // 'duplicate key error'
// retry
return tryToSave(doc, callback);
} else {
// another error
return callback(err);
}
}
// it worked!
callback(null, instance);
});
};
// And somewhere else:
tryToSave(doc, function(err, instance) {
if (err) ...; // handle errors
...
});
I have an interesting case where I need to do a few queries in MongoDB using Mongoose, but the response is returning before I can complete all of them.
I have two document types, list and item. In one particular call, I need to get all of the lists for a particular user, then iterate over each of them and fetch all of the items and append them to the appropriate list before returning.
List.find({'user_id': req.params.user_id}, function(err, docs){
if (!err) {
if (docs) {
var results = [];
_und.each(docs, function(value, key) {
var list = value.toObject();
list.items = [];
Item.find({'list_id': value._id}, function(err, docs) {
if (!err) {
_und.each(docs, function(value, key) { list.items.push(value.toObject()); });
results.push(list);
}
else {
console.log(err);
}
});
});
res.send(results);
(_und is how I've imported underscore.js)
Obviously the issue are the callbacks, and since there's multiple loops I can't return within a callback.
Perhaps this is a case where I would need to get the count in advance and check it on every iteration to decide when to return the results. This doesn't seem elegant though.
Code solution
First of all the issue is with the code. Your sending the results before the Item.find queries finish. You can fix this quite easily
var count = docs.length + 1;
next()
_und.each(docs, function(value, key) {
var list = value.toObject();
list.items = [];
Item.find({
'list_id': value._id
}, function(err, docs) {
if (!err) {
_und.each(docs, function(value, key) {
list.items.push(value.toObject());
});
// push asynchronous
results.push(list);
next()
}
else {
console.log(err);
}
});
});
function next() {
--count === 0 && finish()
}
function finish() {
res.send(results)
}
The easiest way is reference counting, you default count to the number of documents. Then every time your finished getting an item you call next and decrement the count by one.
Once your finished getting all items your count should be zero. Note that we do .length + 1 and call next immediately. This gaurds against the the case where there are no documents, which would otherwise do nothing.
Database solution
The best solution is to use mongo correctly. You should not be doing what is effectively a join in your code, it's slow and inefficient as hell. You should have a nested document and denormalize your list.
so list.items = [Item, Item, ...]
As a further aside, avoid mongoose, it's inefficient, use the native mongo driver.
I use with this module:
https://github.com/caolan/async