Nodejs delay return for "require" - javascript

My setup is as follows:
Nodejs Server
server.js requires utils.js
utils.js loads data from mongodb into memory and exports it
server.js uses a variable that utils.js exports
The issue that I am worried about is the fact that the mongodb call is asynchronous. utils.js returns before the mongodb call is finished, meaning that server.js will use an undefined variable when it continues execution after the require.
What is the best to address this issue? The only thing I could think of is wrapping my server.js code in a giant callback and pass that to the function that makes the mongodb call. It seems a bit messy to me, is there a better way to do it?
Code:
server.js
var utils = require("./modules/utils.js");
console.log(utils);
//Do whatever
utils.js
var mods = [];
var db = require("mongojs").connect("localhost", ["modules"]);
db.modules.find({}, function(err, modules){
mods = modules;
});
module.exports = mods;

What you're referring to is called "callback hell". The easiest way to get out of that is to use a Promise library that simplifies it.
I used a node package called bluebird.
var mysql = require("mysql");
var hash = require("password-hash");
var Promise = require("bluebird");
var settings = require("../settings");
Promise.promisifyAll(require("mysql/lib/Connection").prototype);
Promise.promisifyAll(require("mysql/lib/Pool").prototype);
var db_config = {
user:settings.db.user,
password:settings.db.password,
database:settings.db.database
};
var con = mysql.createPool(db_config);
function query(sql) {
return con.getConnectionAsync().then(function(connection) {
return connection.queryAsync(sql)
.spread(function(rows,fields) {
return rows;
}).finally(function() {
connection.release();
});
});
}
This is a very basic database module I wrote that uses bluebird to promisify the database object.
And here's how it's used. It returns a promise! The benefit here is that not only does it return the clutter of callback hell, it makes sure that your code runs asynchronously and the function does not return before things have stopped processing, like in this case, a database query.
function login(user) {
//check for player existance
var query = 'SELECT p.name,p.password,p.id, pd.x, pd.y FROM player p INNER JOIN player_data pd ON p.id = pd.id WHERE p.name='+mysql.escape(user);
return db.select(query).then(function(rows) {
if (!rows.length) return;
return [
rows[0]
];
});
}
Notice how you return a promise, so that you call the then or spread method to get those database values you just queried, not having to worry about if rows will be undefined by the time you want to use it.

As you say, you need to wrap the entire server in a callback. Node.js works this way, it's asynchronous by nature. A server needs to pass through 3 stages: init, serve and deinit. In your case, that database code goes inside the init stage. You could write something like this.
//server.js
var utils = require ("./modules/utils");
var init = function (cb){
//init the utils module, the http server and whatever you need
utils.init (function (error){
if (error) return handleError (error);
cb ();
});
};
var serve = function (){
//Start listening to the http requests
};
var deinit = function (cb){
//This is typically executed when a SIGINT is received, see link1
};
init (serve);
//utils.js
//You could write a wrapper for the db instance, see link2
var mongodb = require ("mongojs");
var db;
module.exports.init = function (cb){
db = mongodb.connect ("localhost", ["modules"]);
db.modules.find ({}, function (err, modules){
if (err) return cb (err);
cb (null, modules);
});
};
I don't recommend using promises, they are slower than raw callbacks. You don't need them at all.
link1 - SIGINT
link2 - db wrapper

Related

How to add many records to mongoDB from directory of JSON files?

I have about a million JSON files saved across many sub-directories of the directory "D:/njs/nodetest1/imports/source1/" and I want to import them into the collection "users" in my mongoDB database.
The following code correctly traverses through the file system. As you can see, it reads each item in the directory and if that item is a directory it reads each item in it. For each item that is not a directory it performs a some operations on it before sending a variable holding an to a function.
function traverseFS (path){
var files = fs.readdirSync(path);
for (var i in files){
var currentFile = path + '/' + files[i];
var stats = fs.statSync(currentFile);
if (stats.isFile())
runOnFile(currentFile);
else
traverseFS(currentFile);
}
}
traverseFS("D:/njs/nodetest1/imports/source1/")
Next, I run a few operations on the code (see below). This reads the file, parses it into a JSON object, reads two attributes of that object into variables,creates an object in the variable "entry" and passes the variable to another function.
function runOnFile(currentFile){
var fileText = fs.readFileSync(currentFile,'utf8');
var generatedJSON = JSON.parse(fileText);
var recordID = generatedJSON.recordID;
var recordText = generatedJSON.recordTexts;
var entry = {recordID:recordID, recordText:recordText};
insertRecord(entry);
}
The final function then should be used to insert the data into mongoDB. I think that this is where thing go wrong.
function insertRecord(entry){
var MongoClient = mongodb.MongoClient;
var MongoURL = 'mongodb://localhost:27017/my_database_name';
MongoClient.connect(MongoURL, function (err, db) {
var collection = db.collection('users');
collection.insert([entry], function (err, result) {
db.close();
});
});
}
I expected this to run through the file structure, reading the JSON files into objects and then inserting those objects into my mongoDB. Instead it reads the first file into the database and then stops/hangs.
Notes:
I don't want to use mongoimport because I don't want to insert all the data from these files into my MongoDB database. I however am not tied to any aspect of this approach. If some other solution exists I am open to it.
This connects to the database just fine. For each item in the directory this successfully creates an "entry" object and passes it to the insertRecord function. In other words, the problem must be occuring in the insertRecord section. But it obviously could be caused by something earlier in the process.
If I add error handling, no errors are produced. I have left the error handling out of this post because it clutters the readability of the code snippets.
As per mongodb2.2 (current latest) documentation, insert is deprecated
DEPRECATED
Use insertOne, insertMany or bulkWrite
So the short answer is probably to change collection.insert([entry], ...) to collection.insertOne(entry, ...) and you're done.
Then for the long answer, you say "about a million of json files", which typically deserves a full async approach with the least amount of overhead.
There are two (potential) bottlenecks in the sample code:
fs.readFileSync, this is a blocking operation
the connecting, inserting a record and closing the database connection
Both are executed "about a million of times". Granted, an import is not usually done over and over again and (hopefully) not on a machine which needs its performance for other important tasks. Still, the sample code can easily be made more robust.
Consider using the glob module to obtain the list of json file.
glob('imports/**/*.json', function(error, files) {...})
This provides you with the full list of files easily in an async fashion.
Then consider connecting to the database just once, insert everything and close once.
Maintaining more or less the same steps you have in the sample, I'd suggest something like:
var glob = require('glob'),
mongodb = require('mongodb'),
fs = require('fs'),
MongoClient = mongodb.MongoClient,
mongoDSN = 'mongodb://localhost:27017/my_database_name',
collection; // moved this to the "global" scope so we can do it only once
function insertRecord(json, done) {
var recordID = json.recordID || null,
recordText = json.recordText || null;
// the question implies some kind of validation/sanitation/preparation..
if (recordID && recordText) {
// NOTE: insert was changed to insertOne
return collection.insertOne({recordID: recordID, recordText: recordText}, done);
}
done('No recordID and/or recordText');
}
function runOnFile(file, done) {
// moved to be async
fs.readFile(file, function(error, data) {
if (error) {
return done(error);
}
var json = JSON.parse(data);
if (!json) {
return done('Unable to parse JSON: ' + file);
}
insertRecord(json, done);
});
}
function processFiles(files, done) {
var next = files.length ? files.shift() : null;
if (next) {
return runOnFile(next, function(error) {
if (error) {
console.error(error);
// you may or may not want to stop here by throwing an Error
}
processFiles(files, done);
});
}
done();
}
MongoClient.connect(mongoDSN, function(error, db) {
if (error) {
throw new Error(error);
}
collection = db.collection('users');
glob('imports/**/*.json', function(error, files) {
if (error) {
throw new Error(error);
}
processFiles(files, function() {
console.log('all done');
db.close();
});
});
});
NOTE: You can collect multiple "entry"-records to leverage the performance gain of multiple inserts using insertMany, though I have the feeling the inserted records are more complicated than described and it might give some memory issues if not handled correctly.
Just structure your data into one big array of objects, then run db.collection.insertMany.
I suggest you doing this using Promises:
const Bluebird = require('bluebird');
const glob = Bluebird.promisify(require('glob'));
const mongodb = require('mongodb');
const fs = Bluebird.promisifyAll(require('fs'));
const Path = require('path');
const MongoClient = mongodb.MongoClient;
const insertMillionsFromPath = Bluebird.coroutine(function *(path, mongoConnString) {
const db = yield MongoClient.connect(mongoConnString);
try {
const collection = db.collection('users');
const files = yield glob(Path.join(path, "*.json"));
yield Bluebird.map(
files,
Bluebird.coroutine(function *(filename) {
console.log("reading", filename);
const fileContent = yield fs.readFileAsync(filename);
const obj = JSON.parse(fileContent);
console.log("inserting", filename);
yield collection.insertOne(obj);
}),
{concurrency: 10} // You can increase concurrency here
);
} finally {
yield db.close();
}
});
insertMillionsFromPath("./myFiles", "mongodb://localhost:27017/database")
.then(()=>console.log("OK"))
.catch((err)=>console.log("ERROR", err));
In order to work, you will need to install the following packages:
npm install --save mongodb bluebird glob
and you will need to use node.js version 6 or greater, otherwise you will need to transpile your javascript (due to function *() generators usage).

NodeJS mysql2 Bluebird?

Been testing mysql vs mysql2, seems like 2 has made some improvments however it's not an exact drop in replacement. At the same time Q is a good library which seems easier to integrate with however bluebird seems to take less memory and run faster so...
My current mysql-bluebird connector is as follows and allows for straight forward use of query('SELECT email FROM users.users WHERE id=?',id).then(function(res){var email=res[0][0];});
/* global module, require */
var conf=require('./conf.js').conf;
var mysql = require('mysql');
var Promise = require('bluebird');
var using = Promise.using;
Promise.promisifyAll(require('mysql/lib/Connection').prototype);
Promise.promisifyAll(require('mysql/lib/Pool').prototype);
var pool = mysql.createPool(conf.mysql);
var getConnection = function () {
return pool.getConnectionAsync().disposer(function (connection) {
return connection.release();
});
};
var query = function (command) {
return using(getConnection(), function (connection) {
return connection.queryAsync(command);
});
};
function queryWrapper(q,a){
if(a){
return query(mysql.format(q,a));
}
else{
return query(mysql.format(q));
}
}
module.exports = {
query: queryWrapper
};
So far my attempts ad doing this with mysql2 haven't panned out.
Does anyone have any insights on how to convert this?
Thanks, Jegsar
You can use mysql2-promise. It's a simple wrapper, using q, that promisifies mysql2. If you'd rather use Bluebird, you can look at how this wrapper was created and do it yourself.
node-mysql2 now has Promise api built in, and you can choose which promise implementation you want to use
var mysql = require('mysql2/promise');
mysql.createConnection({
Promise: require('bluebird'), // if not set, global Promise is used
user: 'foo',
password: 'bar',
database: 'baz'
})
.then((conn) => conn.query('select 1+1 as test'))
.then(([rows, fields]) => console.log(rows[0]))

variable not defined node.js

I'm trying to use Node.js to get a response from an API, I want to clean the API response and use the result.
So to access the first API I have the following code.
To store and use the result I believe I need to store the JSON output globally.
However, I can't work out how to do this.
Example -
var request = require('request');
request({url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true}, function(err, res, json) {
if (err) {
throw err;
}
car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
console.log(car2go);
});
This prints the desired result however I know that this is because my variable is defined within the function.
I want to access the variable outside of the function.
To test if I could do this I changed the code to -
var request = require('request');
request({url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true}, function(err, res, json) {
if (err) {
throw err;
}
car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
});
console.log(car2go);
But if I do this I get
ReferenceError: car2go is not defined
I am running Node v0.12.2 on Mac OS Yosemite (10.10.3).
Admittedly I am very new to node and I am more familiar with R, Python and PL SQL.
There is no way to get reference to it outside of the callback function because the console.log line runs before the callback function is invoked. The reason you have to pass a callback function into the request API is because the request library needs to invoke that function when it's done making the request. Meanwhile, your app moves on and does other things (such as running that console.log line) while it waits for the callback function to fire.
That said, there are a number of ways to deal with asynchronous code. My favorite way is with promises. I use a library called bluebird for handling promises.
var request = require('request');
var Promise = require('bluebird');
var requestP = Promise.promisify(request);
The call to Promise.promisify(request) returns a new function that doesn't take a callback function, but instead returns a promise.
requestP({ url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true })
.spread(function(res, json) {
var car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
})
.then(function (car2go) {
console.log(car2go);
})
.catch(function (err) {
console.error(err);
});
Note: .spread is the same as .then except if the resolved value is an array (which it will be because the callback passed to the request library accepts 2 arguments, which bluebird will translate into an array that the promise resolves to) .spread will split up the array back into multiple arguments passed into the function you give to .spread.
Promise.resolve(['hi', 'there']).then(function (result) {
console.log(result); // "['hi', 'there']"
});
Promise.resolve(['hi', 'there']).spread(function (str1, str2) {
console.log(str1); // 'hi'
console.log(str2); // 'there'
});
You're not going to be able to return that value all the way back out to the same context from which you began the asynchronous call, but you can at least write code that looks somewhat synchronous when using promises.
Without promises you'll be forced to call functions from within functions from within functions from within functions ;)
The response is asynchronous. That means the callback function gets called sometime LATER in the future so your console.log(car2go) is executing BEFORE the callback has even been called.
The only place you can reliably use the response is inside the callback or in a function called from the callback. You cannot use it the way you are trying to. Using asynchronous responses in Javascript requires programming in an asynchronous fashion which means processing results and using results IN the asynchronous callbacks only.
Here's where the console.log() should be:
var request = require('request');
request({url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true}, function (err, res, json) {
if (err) {
throw err;
}
car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
// here is where the result is available
console.log(car2go);
});

How do I wait for a promise to fill and then return a generator function?

I know this is wrong, but essentially I want to
connect to a db/orm via a promise
wait on that promise to fulfill and get the models (the return from the promise)
use the results for form a middleware generator function to place the models on the request
I suspect that this isn't the best approach, so essentially I have two questions:
Should I be rewriting my db/orm connect to a generator function (I have a feeling that is more inline with koa style)
Back to the original question (as I am sure I will not get a chance to rewrite all my business logic) - how do I wait on a promise to fulfill and to then return a generator function?
This is my poor attempt - which is not working, and honestly I didn't expect it to, but I wanted to start by writing code, to have something to work with to figure this out:
var connectImpl = function() {
var qPromise = q.nbind(object.method, object);
return qPromise ;
}
var domainMiddlewareImpl = function() {
let connectPromise = connectImpl()
return connectPromise.then(function(models){
return function *(next){
this.request.models = models ;
}
})
}
var app = koa()
app.use(domainMiddlewareImpl())
According to this, you can do the following:
var domainMiddlewareImpl = function() {
return function *(){
this.request.models = yield connectImpl();
};
};
A context sensitive answer based on the info provided by Hugo (thx):
var connectImpl = function() {
var qPromise = q.nbind(object.method, object);
return qPromise ;
}
var domainMiddlewareImpl = function () {
let models = null ;
return function *(next) {
if(models == null){
//////////////////////////////////////////////////////
// we only want one "connection", if that sets up a
// pool so be it
//////////////////////////////////////////////////////
models = yield connectImpl() ;
}
this.request.models = models.collections;
this.request.connections = models.connections;
yield next
};
};
My example the connectImpl is setting up domain models in an ORM (waterline for now), connecting to a database (pooled), and returning a promise for the ORM models and DB connections. I only want that to happen once, and then for every request through my Koa middleware, add the objects to the request.

Get DNS MX records in Node

I want to get MX records for hostname www.example.com. Node has got function for it.
dns.resolveMx(domain, callback)
But i don't want that callback thingy. I want something like sync call. e.g.
var records = dns.resolveMx(domain);
Is this possible ?
(Note:- I found that function in Node documentation. link:- http://nodejs.org/api/dns.html)
Update 2021
There is now a promises submodule under dns module if some one is looking for sync calls.
Check more here
https://nodejs.org/api/dns.html#dns_class_dnspromises_resolver
const dnsPromises = require('dns').promises;
const demo1 = await dnsPromises.resolveMx("gmail.com");
Is there any reason you want to block your application with a network operation? The DNS resolvers are called at the C level by the c-ares library which is asynchronous by design. Therefore, you can't make it synchronous. This is the code from the DNS module with the unneeded parts removed:
var cares = process.binding('cares_wrap');
function resolver(bindingName) {
var binding = cares[bindingName];
return function query(name, callback) {
callback = makeAsync(callback);
var req = {
bindingName: bindingName,
callback: callback,
oncomplete: onresolve
};
var err = binding(req, name);
if (err) throw errnoException(err, bindingName);
callback.immediately = true;
return req;
}
}
var resolveMap = {};
exports.resolveMx = resolveMap.MX = resolver('queryMx');

Categories

Resources