Nodejs Cluster: Choose Worker

Nodejs Cluster: Choose Worker - javascript

I use Nodejs Cluster. I got 8 workers. Whenever I go to the application, I get connected to the same worker, (which is normal since the worker can handle multiple clients.)
For testing purposes, I'd like to connect to different workers without having to siege the application. Is there a way to do that?
Ex: Going to mywebsite.com/3 would connect to 3rd worker.

Here is a port-based solution:
var cluster = require('cluster');
var http = require('http');
if (cluster.isMaster) {
cluster.fork();
cluster.fork();
cluster.fork();
return;
}
function app (req, res) {
res.writeHead(200);
res.end('hello from ' + cluster.worker.id);
}
http.createServer(app).listen(8000);
http.createServer(app).listen(8000 + cluster.worker.id);
for example, if you wish to connect to 2 worker you use port 8002.

Related

Socket.io - Multiple connections when there should only be one per browser window

I've noticed that when loading into my app it creates multiple socket connections.
This bug seems quite intermittent, I've been looking around for a solution but nothing seems to be working thus far.
Here's my server-side code:
var express = require('express');
var app = express();
var serv = require('http').Server(app);
var path = require('path');
app.get('/', function (req, res) {
res.sendFile(__dirname + '/client/index.html');
});
app.use('/client',express.static(__dirname + '/client'));
app.use(express.static(path.join(__dirname, 'skin')));
serv.listen(8081);
console.log("Server started");
var io = require('socket.io')(serv,{});
var socketCount = 0;
io.sockets.on('connection', function(socket){
console.log('Socket ' + socketCount + ' connected');
socketCount++;
socket.emit('socketConnected', true);
socket.on('disconnect',function(){
console.log('Socket ' + socketCount + ' disconnected');
socketCount--;
delete SOCKET_LIST[socket.id];
delete PLAYER_LIST[socket.id];
});
...
When refreshing the page, the disconnect function on the server fires, but only once despite the fact that there is supposedly several sockets. I then get multiple socket connecting, again.
When putting a breakpoint on the frontend (listening for socketConnected) it triggers multiple times. Can anyone point out any flaws in the code above that may cause this issue?
Thanks.
New development after some further debugging:
I've noticed that when I emit socketConnected as seen above, I have assumed that I am able to grab some further data from the frontend.
When sending data from the frontend back to the server again, I run my calculations on the server with it and then run a second socket.emit('something .. from the server to the frontend. - This takes several attempts before actually hitting a breakpoint on the frontend. During these attempts to emit the data from the server, this is where the several other connections are made.
However, if I wrap the second server emit in a setTimeout function it works fine and maintains the current connection without creating multiple more. Has anyone else encountered this issue? - If so, is there a solution that avoids using setTimeout?

Some problems when scaling Socket.IO to multiple Node.js processes using cluster

My node.js server uses cluster module in order to work on multiple processes.
If the server receives requests from clients with Socket.IO, it conveys the data to another server with redis publish. And it receive refined data with redis subscribe, and then it just toss this data to clients.
I use one node process to receive data with redis sub, and other processes to send data to clients with socket.io.
And the client connect socket.io when page loaded.
Here, this is my problem.
The connect event occured repeatedly not even the page loaded.
When the client connect, I get the socket.id from that socket, and I use it later when I want to send data to that client socket. But this connect occur repeatedly, I think socket that client use changed. So, the first socket.id that I remembered will be useless. I can't send data from that socket.id. I stored auth information in the socket object, so the changed client socket is no help.
index.pug
$(document).ready(function(){
var socket = io.connect();
(...)
app.js
var cluster = require('cluster');
var socketio = require('socket.io');
var NRP = require('node-redis-pubsub');
var nrpForChat = new NRP(config.chatRedisConfig);
var nrpForCluster = new NRP(config.clusterRedisConfig);
var startExpressServer = function(){
var app = express();
var server = require('http').createServer(app);
var io = socketio.listen(server);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6380 }));
io.sockets.on('connection', function(socket){
socketController.onConnect(io, socket, nrpForChat);
});
server.listen(config.port, function(){
console.log('Server app listening on port '+config.port);
});
nrpForCluster.on('to:others:proc', function(data){
var socket = io.sockets.connected[data.target.sockid];
if (socket) {
if (data.event == '_net_auth') {
if (data.data.res){
socket.enterId = data.data.data.enterId;
socket.memberKey = data.data.data.memberKey;
socket.sid = data.data.data.sid;
socket.emit(data.event, data.data);
}else{
console.log('auth failed.');
}
}
} else {
socket.emit(data.event, data.data);
}
});
module.exports = app;
}
var numCpus = require('os').cpus().length;
if (cluster.isMaster) {
for (var i = 0; i < numCpus; i++) {
cluster.fork();
}
}
else {
if (cluster.worker.id == numCpus) {
nrpForChat.on('chat:to:relay', function(data){
nrpForCluster.emit('to:others:proc', data);
});
if (numCpus == 1) {
startExpressServer();
}
}
else {
startExpressServer();
}
}

By default, socket.io connects with several consecutive http requests. It essentially starts in HTTP polling mode and then after some initial data exchange, it switches to a webSocket transport.
Because of this, a cluster that does not have any sort of sticky load balancing will not work. Each of the initial consecutive http requests that are all supposed to go to the same server process will probably be sent to different server processes in the cluster and the initial connection will not work.
There are two solutions that I know of:
Implement some sort of sticky load balancing (in the clustering module) so that each client repeatedly goes to the same server process and thus all the consecutive http requests at the beginning of a connection will go to the same server process.
Switch your client configurations to immediately switch to the webSocket transport and never use the HTTP polling. The connection will still start with an http request (since that's how all webSocket connections start), but that exact same connection will be upgraded to webSocket so there will only ever be one connection.
FYI, you will also need to make sure that the reconnect logic in socket.io is properly reconnecting to the original server process that is was connected to.
socket.io has node.js clustering support in combination with redis. While the socket.io documentation site has been down for multiple days now, you can find some info here and Scaling Socket.IO to multiple Node.js processes using cluster and here's a previously cached version of the socket.io doc for clustering.

Phantomjs change port page.open()

I'm trying to do web scraping with PhantomJS and to open multiple instances occurred to me to open multiple servers and from there scraping web pages.
With this idea I'm getting together a pool of instances (without using external libraries).
My question is,
page.open () will open the url from the port 8888 I specified or will use the port defalut for web connections (80, 8080, 443, etc)?
If you use the port I specified (in this case 8888) would be very good as it could make faster and more efficient web scrapings that if using a single port.
Thanks for your attention!
PD: I give you a simple example of the code I'm trying to use. Works but do not know how that works page.open () with ports.
var webPage = require('webpage');
var server = require('webserver').create();
function onRequest (request, response) {
var page = webPage.create();
page.open('http://www.google.com/', function(status) {
console.log('Status: ' + status);
console.log(page.content);
});
}
var service = server.listen(8888, onRequest);
if (service) {
console.log('Server OK');
} else {
console.log('Server close');
phantom.exit();
}

Page loads forever when using clusters with Nodejs / Expressjs

This is my application : app.js
/** Express **/
var express = require('express');
/** Create express application **/
var app = express();
/** Set application port **/
app.set('port', process.env.PORT || 3000);
/** Set application view engine**/
var handlebars = require('express-handlebars').create({
defaultLayout: 'main',
helpers: {
section: function(name, options){
if(!this._sections) this._sections = {};
this._sections[name] = options.fn(this);
return null;
},
parrot: function(options){
return options.fn(this) + ' <b> parrot </b>';
}
}
});
/*** Cluster Logger**/
app.use(function(req, res, next){
var cluster = require('cluster');
if(cluster.isWorker) console.log('CLUSTER: Worker %d received request.', cluster.worker.id);
next();
});
/** home page**/
app.get('/', function(req, res){
res.send('Welcome !!');
});
/** about page**/
app.get('/about', function(req, res){
res.send('About us!');
});
/** contact page **/
app.get('/contact', function(req, res){
res.send('contact us here');
});
// startServer in export/direct mode
function startServer(){
app.listen(app.get('port'), function(){
console.log('Parrot started in '+app.get('env')+' mode on http://localhost:'+
app.get('port')+
'; \n press Ctrl-C to terminate');
});
}
if(require.main === module){
startServer();
}else{
module.exports = startServer;
}
And this is parrot.js (with cluster include)
//import cluster
var cluster = require('cluster');
//startWorker
function startWorker(){
var worker = cluster.fork();
console.log('CLUSTER: Worker %d started', worker.id);
}
if(cluster.isMaster){
//in case the cluster is Master
require('os').cpus().forEach(function(){
startWorker();
});
cluster.on('disconnect', function(worker){
console.log('CLUSTER: Worker %d disconnected from the cluster', worker.id);
});
cluster.on('exit', function(worker, code, signal){
console.log('CLUSTER: Worker %d died with exit code %d (%s)', worker.id, code, signal);
startWorker();
});
}else{
//in case cluster.isWorker (not master), run app directly
require('./app.js')();
}
The problem, is that when I run node app.js, the app works just fine on http://localhost:3000 ... and the page works great in the browser.
When I run as a set of clusters (with node parrot.js), everything looks good in console:
CLUSTER: Worker 1 started
CLUSTER: Worker 2 started
Parrot started in development mode on http://localhost:3000;
press Ctrl-C to terminate
Parrot started in development mode on http://localhost:3000;
press Ctrl-C to terminate
But, the page loads forever and nothing shows on the browser? I don't know what's the problem here. Sorry for my language for I'm a Node.js newbie.
Thank you

I don't know exactly what's the problem, but when I tested on another computer (with a 32-bit OS), the example above worked without any problems.
This is my result now, when I visit a page in the browser :
CLUSTER: Worker 1 started
CLUSTER: Worker 2 started
Parrot started in development mode on http://localhost:3333;
press Ctrl-C to terminate
Parrot started in development mode on http://localhost:3333;
press Ctrl-C to terminate
CLUSTER: Worker 2 received request.
CLUSTER: Worker 2 received request.
Just in case clusters didn't work for you, test on a different machine.
Another question I have : I don't know why all requests are served by cluster worker 2 (the last one started), it seems like worker 1 doesn't receive any requests.
Thanks

Scaling Socket.IO to multiple Node.js processes using cluster

Tearing my hair out with this one... has anyone managed to scale Socket.IO to multiple "worker" processes spawned by Node.js's cluster module?
Lets say I have the following on four worker processes (pseudo):
// on the server
var express = require('express');
var server = express();
var socket = require('socket.io');
var io = socket.listen(server);
// socket.io
io.set('store', new socket.RedisStore);
// set-up connections...
io.sockets.on('connection', function(socket) {
socket.on('join', function(rooms) {
rooms.forEach(function(room) {
socket.join(room);
});
});
socket.on('leave', function(rooms) {
rooms.forEach(function(room) {
socket.leave(room);
});
});
});
// Emit a message every second
function send() {
io.sockets.in('room').emit('data', 'howdy');
}
setInterval(send, 1000);
And on the browser...
// on the client
socket = io.connect();
socket.emit('join', ['room']);
socket.on('data', function(data){
console.log(data);
});
The problem: Every second, I'm receiving four messages, due to four separate worker processes sending the messages.
How do I ensure the message is only sent once?

Edit: In Socket.IO 1.0+, rather than setting a store with multiple Redis clients, a simpler Redis adapter module can now be used.
var io = require('socket.io')(3000);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6379 }));
The example shown below would look more like this:
var cluster = require('cluster');
var os = require('os');
if (cluster.isMaster) {
// we create a HTTP server, but we do not use listen
// that way, we have a socket.io server that doesn't accept connections
var server = require('http').createServer();
var io = require('socket.io').listen(server);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6379 }));
setInterval(function() {
// all workers will receive this in Redis, and emit
io.emit('data', 'payload');
}, 1000);
for (var i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}
if (cluster.isWorker) {
var express = require('express');
var app = express();
var http = require('http');
var server = http.createServer(app);
var io = require('socket.io').listen(server);
var redis = require('socket.io-redis');
io.adapter(redis({ host: 'localhost', port: 6379 }));
io.on('connection', function(socket) {
socket.emit('data', 'connected to worker: ' + cluster.worker.id);
});
app.listen(80);
}
If you have a master node that needs to publish to other Socket.IO processes, but doesn't accept socket connections itself, use socket.io-emitter instead of socket.io-redis.
If you are having trouble scaling, run your Node applications with DEBUG=*. Socket.IO now implements debug which will also print out Redis adapter debug messages. Example output:
socket.io:server initializing namespace / +0ms
socket.io:server creating engine.io instance with opts {"path":"/socket.io"} +2ms
socket.io:server attaching client serving req handler +2ms
socket.io-parser encoding packet {"type":2,"data":["event","payload"],"nsp":"/"} +0ms
socket.io-parser encoded {"type":2,"data":["event","payload"],"nsp":"/"} as 2["event","payload"] +1ms
socket.io-redis ignore same uid +0ms
If both your master and child processes both display the same parser messages, then your application is properly scaling.
There shouldn't be a problem with your setup if you are emitting from a single worker. What you're doing is emitting from all four workers, and due to Redis publish/subscribe, the messages aren't duplicated, but written four times, as you asked the application to do. Here's a simple diagram of what Redis does:
Client <-- Worker 1 emit --> Redis
Client <-- Worker 2 <----------|
Client <-- Worker 3 <----------|
Client <-- Worker 4 <----------|
As you can see, when you emit from a worker, it will publish the emit to Redis, and it will be mirrored from other workers, which have subscribed to the Redis database. This also means you can use multiple socket servers connected the the same instance, and an emit on one server will be fired on all connected servers.
With cluster, when a client connects, it will connect to one of your four workers, not all four. That also means anything you emit from that worker will only be shown once to the client. So yes, the application is scaling, but the way you're doing it, you're emitting from all four workers, and the Redis database is making it as if you were calling it four times on a single worker. If a client actually connected to all four of your socket instances, they'd be receiving sixteen messages a second, not four.
The type of socket handling depends on the type of application you're going to have. If you're going to handle clients individually, then you should have no problem, because the connection event will only fire for one worker per one client. If you need a global "heartbeat", then you could have a socket handler in your master process. Since workers die when the master process dies, you should offset the connection load off of the master process, and let the children handle connections. Here's an example:
var cluster = require('cluster');
var os = require('os');
if (cluster.isMaster) {
// we create a HTTP server, but we do not use listen
// that way, we have a socket.io server that doesn't accept connections
var server = require('http').createServer();
var io = require('socket.io').listen(server);
var RedisStore = require('socket.io/lib/stores/redis');
var redis = require('socket.io/node_modules/redis');
io.set('store', new RedisStore({
redisPub: redis.createClient(),
redisSub: redis.createClient(),
redisClient: redis.createClient()
}));
setInterval(function() {
// all workers will receive this in Redis, and emit
io.sockets.emit('data', 'payload');
}, 1000);
for (var i = 0; i < os.cpus().length; i++) {
cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}
if (cluster.isWorker) {
var express = require('express');
var app = express();
var http = require('http');
var server = http.createServer(app);
var io = require('socket.io').listen(server);
var RedisStore = require('socket.io/lib/stores/redis');
var redis = require('socket.io/node_modules/redis');
io.set('store', new RedisStore({
redisPub: redis.createClient(),
redisSub: redis.createClient(),
redisClient: redis.createClient()
}));
io.sockets.on('connection', function(socket) {
socket.emit('data', 'connected to worker: ' + cluster.worker.id);
});
app.listen(80);
}
In the example, there are five Socket.IO instances, one being the master, and four being the children. The master server never calls listen() so there is no connection overhead on that process. However, if you call an emit on the master process, it will be published to Redis, and the four worker processes will perform the emit on their clients. This offsets connection load to workers, and if a worker were to die, your main application logic would be untouched in the master.
Note that with Redis, all emits, even in a namespace or room will be processed by other worker processes as if you triggered the emit from that process. In other words, if you have two Socket.IO instances with one Redis instance, calling emit() on a socket in the first worker will send the data to its clients, while worker two will do the same as if you called the emit from that worker.

Let the master handle your heartbeat (example below) or start multiple processes on different ports internally and load balance them with nginx (which supports also websockets from V1.3 upwards).
Cluster with Master
// on the server
var express = require('express');
var server = express();
var socket = require('socket.io');
var io = socket.listen(server);
var cluster = require('cluster');
var numCPUs = require('os').cpus().length;
// socket.io
io.set('store', new socket.RedisStore);
// set-up connections...
io.sockets.on('connection', function(socket) {
socket.on('join', function(rooms) {
rooms.forEach(function(room) {
socket.join(room);
});
});
socket.on('leave', function(rooms) {
rooms.forEach(function(room) {
socket.leave(room);
});
});
});
if (cluster.isMaster) {
// Fork workers.
for (var i = 0; i < numCPUs; i++) {
cluster.fork();
}
// Emit a message every second
function send() {
console.log('howdy');
io.sockets.in('room').emit('data', 'howdy');
}
setInterval(send, 1000);
cluster.on('exit', function(worker, code, signal) {
console.log('worker ' + worker.process.pid + ' died');
});
}

This actually looks like Socket.IO succeeding at scaling. You would expect a message from one server to go to all sockets in that room, regardless of which server they happen to be connected to.
Your best bet is to have one master process that sends a message each second. You can do this by only running it if cluster.isMaster, for example.

Inter-process communication is not enough to make socket.io 1.4.5 working with cluster. Forcing websocket mode is also a must. See WebSocket handshake in Node.JS, Socket.IO and Clusters not working

Develop Reference

JavaScript is the programming language of the Web.