How to duplicate a mongodb collection with the node.js driver? - javascript

Is there anyway to duplicate an collection through the nodejs mongodb driver?
i.e. collection.copyTo("duplicate_collection");

You can eval copyTo() server-side though it will block the entire mongod process and won't create indexes on the new collection.
var copyTo = "function() { db['source'].copyTo('target') };"
db.eval(copyTo, [], function(err, result) {
console.log(err);
});
Also note the field type warning.
"When using db.collection.copyTo() check field types to ensure that the operation does not remove type information from documents during the translation from BSON to JSON. Consider using cloneCollection() to maintain type fidelity."

Try to avoid .eval() if this is something you want to do regularly on a production system. It's fast, but there are problems.
A better approach would be to use The "Bulk" operations API, and with a little help from the "async" library:
db.collection("target",function(err,target) {
var batch = target.initializeOrderedBulkOp();
counter = 0;
var cursor = db.collection("source").find();
var current = null;
async.whilst(
function() {
cursor.nextObject(function(err,doc) {
if (err) throw err;
// .nextObject() returns null when the cursor is depleted
if ( doc != null ) {
current = doc;
return true;
} else {
return false;
}
})
},
function(callback) {
batch.insert(current);
counter++;
if ( counter % 1000 == 0 ) {
batch.execute(function(err,result) {
if (err) throw err;
var batch = target.initializeOrderedBulkOp();
callback();
});
}
},
function(err) {
if (err) throw err;
if ( counter % 1000 != 0 )
batch.execute(function(err,result) {
if (err) throw err;
// job done
});
}
);
});
It's fast, not as fast as .eval() but does not block either the application or server.
Batch operations will generally take as many operations as you throw at them, but using a modulo as a limiter allows a little more control and essentially avoids loading an unreasonable amount of documents in memory at a time. Keep in mind that whatever the the case the batch size that is sent cannot exceed more that 16MB between executions.

Another option to duplicate a collection would be to use aggregate method on a collection and the $out parameter. Here is an example inside of an async function:
const client = await MongoClient.connect("mongodb://alt_dev:aaaaa:27018/meteor");
const db = client.db('meteor');
const planPrice = await db.collection('plan_price');
const planPriceCopy = await planPrice.aggregate([{$match: {}}, {$out: planPriceUpdateCollection}]);
await planPriceCopy.toArray();
This will create a copy of the original collection with all of its content.

Related

Solved Why do MySQL statements work normally in Node REPL when typed line by line, but not in a function? [duplicate]

In the code
var stuff_i_want = '';
stuff_i_want = get_info(parm);
And the function get_info:
get_info(data){
var sql = "SELECT a from b where info = data"
connection.query(sql, function(err, results){
if (err){
throw err;
}
console.log(results[0].objid); // good
stuff_i_want = results[0].objid; // Scope is larger than function
console.log(stuff_i_want); // Yep. Value assigned..
}
in the larger scope
stuff_i_want = null
What am i missing regarding returning mysql data and assigning it to a variable?
============ New code per Alex suggestion
var parent_id = '';
get_info(data, cb){
var sql = "SELECT a from b where info = data"
connection.query(sql, function(err, results){
if (err){
throw err;
}
return cb(results[0].objid); // Scope is larger than function
}
==== New Code in Use
get_data(parent_recording, function(result){
parent_id = result;
console.log("Parent ID: " + parent_id); // Data is delivered
});
However
console.log("Parent ID: " + parent_id);
In the scope outside the function parent_id is null
You're going to need to get your head around asynchronous calls and callbacks with javascript, this isn't C#, PHP, etc...
Here's an example using your code:
function get_info(data, callback){
var sql = "SELECT a from b where info = data";
connection.query(sql, function(err, results){
if (err){
throw err;
}
console.log(results[0].objid); // good
stuff_i_want = results[0].objid; // Scope is larger than function
return callback(results[0].objid);
})
}
//usage
var stuff_i_want = '';
get_info(parm, function(result){
stuff_i_want = result;
//rest of your code goes in here
});
When you call get_info this, in turn, calls connection.query, which takes a callback (that's what function(err, results) is
The scope is then passed to this callback, and so on.
Welcome to javascript callback hell...
It's easy when you get the hang of it, just takes a bit of getting used to, coming from something like C#
I guess what you really want to do here is returning a Promise object with the results. This way you can deal with the async operation of retrieving data from the DBMS: when you have the results, you make use of the Promise resolve function to somehow "return the value" / "resolve the promise".
Here's an example:
getEmployeeNames = function(){
return new Promise(function(resolve, reject){
connection.query(
"SELECT Name, Surname FROM Employee",
function(err, rows){
if(rows === undefined){
reject(new Error("Error rows is undefined"));
}else{
resolve(rows);
}
}
)}
)}
On the caller side, you use the then function to manage fulfillment, and the catch function to manage rejection.
Here's an example that makes use of the code above:
getEmployeeNames()
.then(function(results){
render(results)
})
.catch(function(err){
console.log("Promise rejection error: "+err);
})
At this point you can set up the view for your results (which are indeed returned as an array of objects):
render = function(results){ for (var i in results) console.log(results[i].Name) }
Edit
I'm adding a basic example on how to return HTML content with the results, which is a more typical scenario for Node. Just use the then function of the promise to set the HTTP response, and open your browser at http://localhost:3001
require('http').createServer( function(req, res){
if(req.method == 'GET'){
if(req.url == '/'){
res.setHeader('Content-type', 'text/html');
getEmployeeNames()
.then(function(results){
html = "<h2>"+results.length+" employees found</h2>"
html += "<ul>"
for (var i in results) html += "<li>" + results[i].Name + " " +results[i].Surname + "</li>";
html += "</ul>"
res.end(html);
})
.catch(function(err){
console.log("Promise rejection error: "+err);
res.end("<h1>ERROR</h1>")
})
}
}
}).listen(3001)
Five years later, I understand asynchronous operations much better.
Also with the new syntax of async/await in ES6 I refactored this particular piece of code:
const mysql = require('mysql2') // built-in promise functionality
const DB = process.env.DATABASE
const conn = mysql.createConnection(DB)
async function getInfo(data){
var sql = "SELECT a from b where info = data"
const results = await conn.promise().query(sql)
return results[0]
}
module.exports = {
getInfo
}
Then, where ever I need this data, I would wrap it in an async function, invoke getInfo(data) and use the results as needed.
This was a situation where I was inserting new records to a child table and needed the prent record key, based only on a name.
This was a good example of understanding the asynchronous nature of node.
I needed to wrap the all the code affecting the child records inside the call to find the parent record id.
I was approaching this from a sequential (PHP, JAVA) perspective, which was all wrong.
Easier if you send in a promise to be resolved
e.g
function get_info(data, promise){
var sql = "SELECT a from b where info = data";
connection.query(sql, function(err, results){
if (err){
throw err;
}
console.log(results[0].objid); // good
stuff_i_want = results[0].objid; // Scope is larger than function
promise.resolve(results[0].objid);
}
}
This way Node.js will stay fast because it's busy doing other things while your promise is waiting to be resolved
I've been working on this goal since few weeks, without any result, and I finally found a way to assign in a variable the result of any mysql query using await/async and promises.
You don't need to understand promises in order to use it, eh, I don't know how to use promises neither anyway
I'm doing it using a Model class for my database like this :
class DB {
constructor(db) {
this.db = db;
}
async getUsers() {
let query = "SELECT * FROM asimov_users";
return this.doQuery(query)
}
async getUserById(array) {
let query = "SELECT * FROM asimov_users WHERE id = ?";
return this.doQueryParams(query, array);
}
// CORE FUNCTIONS DON'T TOUCH
async doQuery(queryToDo) {
let pro = new Promise((resolve,reject) => {
let query = queryToDo;
this.db.query(query, function (err, result) {
if (err) throw err; // GESTION D'ERREURS
resolve(result);
});
})
return pro.then((val) => {
return val;
})
}
async doQueryParams(queryToDo, array) {
let pro = new Promise((resolve,reject) => {
let query = queryToDo;
this.db.query(query, array, function (err, result) {
if (err) throw err; // GESTION D'ERREURS
resolve(result);
});
})
return pro.then((val) => {
return val;
})
}
}
Then, you need to instantiate your class by passing in parameter to constructor the connection variable given by mysql. After this, all you need to do is calling one of your class methods with an await before. With this, you can chain queries without worrying of scopes.
Example :
connection.connect(function(err) {
if (err) throw err;
let DBModel = new DB(connection);
(async function() {
let oneUser = await DBModel.getUserById([1]);
let allUsers = await DBModel.getUsers();
res.render("index.ejs", {oneUser : oneUser, allUsers : allUsers});
})();
});
Notes :
if you need to do another query, you just have to write a new method in your class and calling it in your code with an await inside an async function, just copy/paste a method and modify it
there are two "core functions" in the class, doQuery and doQueryParams, the first one only takes a string as a parameter which basically is your mysql query. The second one is used for parameters in your query, it takes an array of values.
it's relevant to notice that the return value of your methods will always be an array of objects, it means that you'll have to do var[0] if you do a query which returns only one row. In case of multiple rows, just loop on it.

How to add many records to mongoDB from directory of JSON files?

I have about a million JSON files saved across many sub-directories of the directory "D:/njs/nodetest1/imports/source1/" and I want to import them into the collection "users" in my mongoDB database.
The following code correctly traverses through the file system. As you can see, it reads each item in the directory and if that item is a directory it reads each item in it. For each item that is not a directory it performs a some operations on it before sending a variable holding an to a function.
function traverseFS (path){
var files = fs.readdirSync(path);
for (var i in files){
var currentFile = path + '/' + files[i];
var stats = fs.statSync(currentFile);
if (stats.isFile())
runOnFile(currentFile);
else
traverseFS(currentFile);
}
}
traverseFS("D:/njs/nodetest1/imports/source1/")
Next, I run a few operations on the code (see below). This reads the file, parses it into a JSON object, reads two attributes of that object into variables,creates an object in the variable "entry" and passes the variable to another function.
function runOnFile(currentFile){
var fileText = fs.readFileSync(currentFile,'utf8');
var generatedJSON = JSON.parse(fileText);
var recordID = generatedJSON.recordID;
var recordText = generatedJSON.recordTexts;
var entry = {recordID:recordID, recordText:recordText};
insertRecord(entry);
}
The final function then should be used to insert the data into mongoDB. I think that this is where thing go wrong.
function insertRecord(entry){
var MongoClient = mongodb.MongoClient;
var MongoURL = 'mongodb://localhost:27017/my_database_name';
MongoClient.connect(MongoURL, function (err, db) {
var collection = db.collection('users');
collection.insert([entry], function (err, result) {
db.close();
});
});
}
I expected this to run through the file structure, reading the JSON files into objects and then inserting those objects into my mongoDB. Instead it reads the first file into the database and then stops/hangs.
Notes:
I don't want to use mongoimport because I don't want to insert all the data from these files into my MongoDB database. I however am not tied to any aspect of this approach. If some other solution exists I am open to it.
This connects to the database just fine. For each item in the directory this successfully creates an "entry" object and passes it to the insertRecord function. In other words, the problem must be occuring in the insertRecord section. But it obviously could be caused by something earlier in the process.
If I add error handling, no errors are produced. I have left the error handling out of this post because it clutters the readability of the code snippets.
As per mongodb2.2 (current latest) documentation, insert is deprecated
DEPRECATED
Use insertOne, insertMany or bulkWrite
So the short answer is probably to change collection.insert([entry], ...) to collection.insertOne(entry, ...) and you're done.
Then for the long answer, you say "about a million of json files", which typically deserves a full async approach with the least amount of overhead.
There are two (potential) bottlenecks in the sample code:
fs.readFileSync, this is a blocking operation
the connecting, inserting a record and closing the database connection
Both are executed "about a million of times". Granted, an import is not usually done over and over again and (hopefully) not on a machine which needs its performance for other important tasks. Still, the sample code can easily be made more robust.
Consider using the glob module to obtain the list of json file.
glob('imports/**/*.json', function(error, files) {...})
This provides you with the full list of files easily in an async fashion.
Then consider connecting to the database just once, insert everything and close once.
Maintaining more or less the same steps you have in the sample, I'd suggest something like:
var glob = require('glob'),
mongodb = require('mongodb'),
fs = require('fs'),
MongoClient = mongodb.MongoClient,
mongoDSN = 'mongodb://localhost:27017/my_database_name',
collection; // moved this to the "global" scope so we can do it only once
function insertRecord(json, done) {
var recordID = json.recordID || null,
recordText = json.recordText || null;
// the question implies some kind of validation/sanitation/preparation..
if (recordID && recordText) {
// NOTE: insert was changed to insertOne
return collection.insertOne({recordID: recordID, recordText: recordText}, done);
}
done('No recordID and/or recordText');
}
function runOnFile(file, done) {
// moved to be async
fs.readFile(file, function(error, data) {
if (error) {
return done(error);
}
var json = JSON.parse(data);
if (!json) {
return done('Unable to parse JSON: ' + file);
}
insertRecord(json, done);
});
}
function processFiles(files, done) {
var next = files.length ? files.shift() : null;
if (next) {
return runOnFile(next, function(error) {
if (error) {
console.error(error);
// you may or may not want to stop here by throwing an Error
}
processFiles(files, done);
});
}
done();
}
MongoClient.connect(mongoDSN, function(error, db) {
if (error) {
throw new Error(error);
}
collection = db.collection('users');
glob('imports/**/*.json', function(error, files) {
if (error) {
throw new Error(error);
}
processFiles(files, function() {
console.log('all done');
db.close();
});
});
});
NOTE: You can collect multiple "entry"-records to leverage the performance gain of multiple inserts using insertMany, though I have the feeling the inserted records are more complicated than described and it might give some memory issues if not handled correctly.
Just structure your data into one big array of objects, then run db.collection.insertMany.
I suggest you doing this using Promises:
const Bluebird = require('bluebird');
const glob = Bluebird.promisify(require('glob'));
const mongodb = require('mongodb');
const fs = Bluebird.promisifyAll(require('fs'));
const Path = require('path');
const MongoClient = mongodb.MongoClient;
const insertMillionsFromPath = Bluebird.coroutine(function *(path, mongoConnString) {
const db = yield MongoClient.connect(mongoConnString);
try {
const collection = db.collection('users');
const files = yield glob(Path.join(path, "*.json"));
yield Bluebird.map(
files,
Bluebird.coroutine(function *(filename) {
console.log("reading", filename);
const fileContent = yield fs.readFileAsync(filename);
const obj = JSON.parse(fileContent);
console.log("inserting", filename);
yield collection.insertOne(obj);
}),
{concurrency: 10} // You can increase concurrency here
);
} finally {
yield db.close();
}
});
insertMillionsFromPath("./myFiles", "mongodb://localhost:27017/database")
.then(()=>console.log("OK"))
.catch((err)=>console.log("ERROR", err));
In order to work, you will need to install the following packages:
npm install --save mongodb bluebird glob
and you will need to use node.js version 6 or greater, otherwise you will need to transpile your javascript (due to function *() generators usage).

Correct way to insert many records into Mongodb with Node.js

I was wondering what is the correct way to do bulk inserts into Mongodb (although could be any other database) with Node.js
I have written the following code as an example, although I believe it is floored as db.close() may be run before all the asynchronous collection.insert calls have completed.
MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
var i, collection;
if (err) {
throw err;
}
collection = db.collection('entries');
for (i = 0; i < entries.length; i++) {
collection.insert(entries[i].entry);
}
db.close();
});
If your MongoDB server is 2.6 or newer, it would be better to take advantage of using a write commands Bulk API that allow for the execution of bulk insert operations which are simply abstractions on top of the server to make it easy to build bulk operations and thus get perfomance gains with your update over large collections.
Sending the bulk insert operations in batches results in less traffic to the server and thus performs efficient wire transactions by not sending everything all in individual statements, but rather breaking up into manageable chunks for server commitment. There is also less time waiting for the response in the callback with this approach.
These bulk operations come mainly in two flavours:
Ordered bulk operations. These operations execute all the operation in order and error out on the first write error.
Unordered bulk operations. These operations execute all the operations in parallel and aggregates up all the errors. Unordered bulk operations do not guarantee order of execution.
Note, for older servers than 2.6 the API will downconvert the operations. However it's not possible to downconvert 100% so there might be some edge cases where it cannot correctly report the right numbers.
In your case, you could implement the Bulk API insert operation in batches of 1000 like this:
For MongoDB 3.2+ using bulkWrite
var MongoClient = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/test';
var entries = [ ... ] // a huge array containing the entry objects
var createNewEntries = function(db, entries, callback) {
// Get the collection and bulk api artefacts
var collection = db.collection('entries'),
bulkUpdateOps = [];
entries.forEach(function(doc) {
bulkUpdateOps.push({ "insertOne": { "document": doc } });
if (bulkUpdateOps.length === 1000) {
collection.bulkWrite(bulkUpdateOps).then(function(r) {
// do something with result
});
bulkUpdateOps = [];
}
})
if (bulkUpdateOps.length > 0) {
collection.bulkWrite(bulkUpdateOps).then(function(r) {
// do something with result
});
}
};
For MongoDB <3.2
var MongoClient = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/test';
var entries = [ ... ] // a huge array containing the entry objects
var createNewEntries = function(db, entries, callback) {
// Get the collection and bulk api artefacts
var collection = db.collection('entries'),
bulk = collection.initializeOrderedBulkOp(), // Initialize the Ordered Batch
counter = 0;
// Execute the forEach method, triggers for each entry in the array
entries.forEach(function(obj) {
bulk.insert(obj);
counter++;
if (counter % 1000 == 0 ) {
// Execute the operation
bulk.execute(function(err, result) {
// re-initialise batch operation
bulk = collection.initializeOrderedBulkOp();
callback();
});
}
});
if (counter % 1000 != 0 ){
bulk.execute(function(err, result) {
// do something with result
callback();
});
}
};
Call the createNewEntries() function.
MongoClient.connect(url, function(err, db) {
createNewEntries(db, entries, function() {
db.close();
});
});
You can use insertMany. It accepts an array of objects. Check the API.
New in version 3.2.
The db.collection.bulkWrite() method provides the ability to perform bulk insert, update, and remove operations. MongoDB also supports bulk insert through the db.collection.insertMany().
In bulkWrite it is supporting only insertOne, updateOne, updateMany, replaceOne, deleteOne, deleteMany
In your case to insert data using single line of code, it can use insertMany option.
MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
var i, collection;
if (err) {
throw err;
}
collection = db.collection('entries');
collection.insertMany(entries)
db.close();
});
var MongoClient = require('mongodb').MongoClient;
var url = 'mongodb://localhost:27017/test';
var data1={
name:'Data1',
work:'student',
No:4355453,
Date_of_birth:new Date(1996,10,17)
};
var data2={
name:'Data2',
work:'student',
No:4355453,
Date_of_birth:new Date(1996,10,17)
};
MongoClient.connect(url, function(err, db) {
if(err!=null){
return console.log(err.message)
}
//insertOne
db.collection("App").insertOne(data1,function (err,data) {
if(err!=null){
return console.log(err);
}
console.log(data.ops[0]);
});
//insertMany
var Data=[data1,data2];
db.collection("App").insertMany(Data,forceServerObjectId=true,function (err,data) {
if(err!=null){
return console.log(err);
}
console.log(data.ops);
});
db.close();
});

How to find the next document in MongoDB using node.js

Ideally, I would like to be able to compare data from the cursor object and the following cursor.next() object in order to perform other functions.
The following code results in the nextDoc (or nextState) being undefined.
var data = db.collection('data');
var myCursor = data.find({});
myCursor.sort({'State': 1, 'Temperature': -1});
var nextDoc = myCursor.nextObject( function (err, doc) {
if (err) throw err;
});
myCursor.each( function (err, doc) {
if(err) throw err;
if(doc == null) {
return db.close();
}
var currState = doc.State;
var nextState = nextDoc;
console.log("currState:" + currState + " nextState:" + nextState);
});
Well if you think about it, asking for the "next" document really does not make that much sense as soon enough in your execution you are going to hit the "end" of the query results and there just will not be a "next".
Therefore the better approach is to "keep" the "last" result and then compare to the current item. The logic is also quite simple as the very first has nothing to compare to, so there is little point. But everything else works fine.
A preferce here is to use the stream interface which is natively presented in the latest driver (otherwise call .stream() ), as there is a bit more control here in case you might want to do other things such as an .update() or other async operation:
var data = db.collection('data');
var myCursor = data.find({});
myCursor.sort({'State': 1, 'Temperature': -1});
var prevState = null;
myCursor.on("err",function(err) {
throw err;
});
myCursor.on("end",function() {
db.close();
});
myCursor.on("data",function(data) {
myCursor.pause(); // stops other events emitting
if ( prevState != null ) {
console.log("prevState: %s currState %s",prevState,data.State);
}
prevState = data.State;
myCursor.resume(); // Restart when finsihed with document
});
The .pause() and .resume() methods there make sure that the variable outside of the scope is respected with each iteration as no more "data" events are emitted until resumed.
Of course if you did do an asynchronous process then the .resume() should be called within it's callback function to ensure the "loop" does not continue until that processing is complete.
Therefore it is just looking at the problem the other way around, and look back not forwards, which is something cursors and streams happily do.

How to use 64-bit long auto-increment counters in MongoDB

I'm implementing a logger database using MongoDB. The capped collection will contain log messages collected from several sources across the network. Since I want to do $lte/$gte queries on _id afterwards I need to have an _id that grows as a monotonic function.
To achieve that I've implemented the auto-incremented counter described in this article http://docs.mongodb.org/manual/tutorial/create-an-auto-incrementing-field/
My code looks like that:
var mongo = require("mongodb");
var Promise = require('es6-promise').Promise;
function connectToDB(mongo, uri) {
return new Promise(function(resolve, reject) {
mongo.MongoClient.connect(uri, function (err, db) {
if(err) reject(err);
else resolve(db);
});
});
}
function getNextSequenceNumber(db, counterName) {
return new Promise(function(resolve, reject) {
db.collection("counters", function (err, collection) {
if (err) reject(err);
else {
var criteria = { _id: counterName };
var sort = {$natural: 1};
var update = { $inc: { seq: 1 } };
var options = {remove: false, new: true, upsert: true};
collection.findAndModify(criteria, sort, update, options, function(err, res) {
if(err) reject(err);
else resolve(res.seq);
});
}
});
});
}
It works perfectly fine, but I've read that by default the number fields used in MongoDB are actually floats. The problem is that my database is a capped collection of log entries and it is going to have lots of entries. Moreover, since this is a capped collection the old entries will be overwritten but the counter will keep growing. Having counter as a float I cannot guarantee the system will keep on working after a few years.
My question is how can I force MongoDB to use 64 bit counter in this particular case.
Please provide some code examples.
MongoDB (or rather BSON) has the NumberLong type, which is a 64-bit signed integer.
From Node.js you can use it in your update statement to create the seq property of that type:
var update = { $inc : { seq : mongo.Long(1) } };
This also seems to convert the seq property of existing documents to NumberLong.

Categories

Resources