Kill all Node cluster workers once operation is complete

Kill all Node cluster workers once operation is complete - javascript

I'm trying to create a cluster to spread hashing between CPUs, the one who finds the hash return that for me to use and kill all other workers when the first one responds.
I started my code by just creating the clusters and running the function, then added the "send" back to the master and then tried adding the logic to kill all workers,
I've read the documentation Killing node.js workers after function is done as reference but it doesn't seem to work - I can see a Node operation still running in the background (on a 2-core machine) using a ton of CPU, then I'll get some console log even when the Node process is finished and back on the bash terminal.
I can't for the life of me figure out where I'm going wrong so any assistance would be appreciated.
My current code is:
if (cluster.isMaster) {
for (let i = 0; i < numCPUs; i++) {
let worker = cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
for (var id in cluster.workers) {
cluster.workers[id].kill();
}
process.exit(0);
});
function messageHandler(msg) {
console.log(msg);
if (msg.hash.length > 1) {
console.log(msg.hash);
}
}
for (const id in cluster.workers) {
cluster.workers[id].on('message', messageHandler);
}
} else {
console.log(`Worker ${process.pid} started and finished`)
console.log(parseInt(cluster.worker.id));
let difficulty = 5;
i = cluster.worker.id;
var start = new Date();
var hrstart = process.hrtime();
hash = computeHash(index, lasthash, timestamp, data, i);
while (hash.substring(0, difficulty) !== Array(difficulty + 1).join("0")) {
hash = computeHash(index, lasthash, timestamp, data, i);
i = i + cluster.worker.id;
}
var end = new Date() - start,
hrend = process.hrtime(hrstart);
console.info('Execution time (hr): %ds %dms', hrend[0], hrend[1] / 1000000)
console.log("Hash found from: " + i);
process.send({
hash: hash
});
process.exit(0);
}

Okay so I managed to fix this in quite a hacky way. I discovered on Windows that the answer provided elsewhere does work (ie):
for (var id in cluster.workers) {
cluster.workers[id].kill();
}
However on Linux, you'll still have the process running even if you terminate the master. If you're like me and using blocking code on your child, it won't work (I also couldn't add a timeout into the function, for whatever reason).
The way I fixed this was to get a list of all remaining workers and their pids inside the loop, as before, only this time use process.kill with the pid of the remaining workers (like so:)
for (var id in cluster.workers) {
console.log("Killing remaining processes");
let process_id = cluster.workers[id].process.pid;
process.kill(process_id);
}
This solution is hacky, but it works and there is very limited examples out there so I hope this can help someone.

Related

Cordova iOS app using IndexedDB database size calculation without crashing app

I have a cordova app for iOS in which I'm using indexedDB to store significant amounts of data in separate stores in one database.
I want to inform the user of the amount of space being used by the app in this way, partly as the limit for indexedDB seems to be unclear/different on different devices, and I'd like to use it to see where the usage is at at point of failure, and also as a way to warn the user that they need to manage the data they're storing offline before it becomes a problem (although I know I can capture this is the transaction abort event - I just have no idea what the limit is!)
In development I've been using the function below in the browser (I have the browser platform added, just for development) which has worked well:
function showIndexedDbSize(db_name) {
"use strict";
var this_db;
var storesizes = new Array();
function openDatabase() {
return new Promise(function(resolve, reject) {
var request = window.indexedDB.open(db_name);
request.onsuccess = function (event) {
this_db = event.target.result;
resolve(this_db.objectStoreNames);
};
});
}
function getObjectStoreData(storename) {
return new Promise(function(resolve, reject) {
var trans = this_db.transaction(storename, IDBTransaction.READ_ONLY);
var store = trans.objectStore(storename);
var items = [];
trans.oncomplete = function(evt) {
var szBytes = toSize(items);
var szMBytes = (szBytes / 1024 / 1024).toFixed(2);
storesizes.push({'Store Name': storename, 'Items': items.length, 'Size': szMBytes + 'MB (' + szBytes + ' bytes)'});
resolve();
};
var cursorRequest = store.openCursor();
cursorRequest.onerror = function(error) {
reject(error);
};
cursorRequest.onsuccess = function(evt) {
var cursor = evt.target.result;
if (cursor) {
items.push(cursor.value);
cursor.continue();
}
}
});
}
function toSize(items) {
var size = 0;
for (var i = 0; i < items.length; i++) {
var objectSize = JSON.stringify(items[i]).length;
size += objectSize * 2;
}
return size;
}
openDatabase().then(function(stores) {
var PromiseArray = [];
for (var i=0; i < stores.length; i++) {
PromiseArray.push(getObjectStoreData(stores[i]));
}
Promise.all(PromiseArray).then(function() {
this_db.close();
console.table(storesizes);
});
});
};
It works well on the device too when the stores total <150MB, or thereabouts (there isn't a clear threshold), but it uses JSON.stringify to serialize the objects in order to count the bytes, and the process of doing this as the database grows larger on the device forces the app to restart. I'm watching the memory usage in XCode and it doesn't peak at all. Nothing. It hovers between 25 and 30MB whatever you do, not just this, which seems ok to me. The CPU is also <5%. The energy usage is high, but I'm not sure this would affect the app negatively, just drain the battery faster (unless I've misunderstood something). So I'm not sure why it's forcing an ugly restart. In my endless googling I've learnt that JSON.parse and JSON.stringify are very hungry processes, which is why I switched to indexedDB in the first place as it allows the storage of objects, avoiding these processes entirely.
My questions are as follows:
Is there a way to amend the function to slow it down (it doesn't need to be fast, just reliable!) to prevent the restart?
Why would the app refresh if there is not discernible pressure on the memory in XCode? Or is this not a very good way of detecting this sort of thing? Is there some hidden garbage collection problem in the function (I'm a noob when it comes to GC generally, but there doesn't seem to be any leaks in the app)
Is there a better way to show the usage of the database that would avoid this problem? Everything I find always relies on these JSON processes and the navigator.storage Web API doesn't appear to be supported on the cordova iOS platform (which is a real shame as it works amazingly on the browser! Gah!)
Any suggestions/thoughts massively appreciated!

Node.js worker threads stop parallelizing when I require a Typescript-compiled file

So my long-term goal here is to run a function from "main.ts" in parallel with 7 different inputs. There's no shared resources, just a pure function.
As a test, I spun up worker threads that did trivial math operations over and over, and it parallelized perfectly (code shown below).
However, when I so much as require (not run in any way) a function from the main portion of my app, the trivial math operations stop parallelizing.
I'm completely lost on how requiring a file can change thread behavior. Anyone have ideas? I've pasted as much info as I can below.
worker_thread.js
const process = require("process");
function heavyComputation(data){
console.time(data.toString())
let sum = 0;
for (let i = 0; i < 100000000; i++){
sum = Math.pow(sum,1.02) % 100000;
}
console.timeEnd(data.toString())
return data + 1000;
}
process.on("message", (message) => {
process.send({
result: heavyComputation(1),
});
});
worker_test.js
const child_process = require("child_process");
let workers = [];
const NUM_THREADS = 7;
let pendingResults = NUM_THREADS;
console.time("async");
function onMessage(message) {
// console.log("Received response message:", message.result);
pendingResults--;
if (pendingResults == 0){
console.timeEnd("async");
workers.forEach(x => x.kill());
}
}
for (let i = 0; i < NUM_THREADS; i++){
workers[i] = child_process.fork("src/server/worker_thread.js");
workers[i].addListener("message", onMessage);
}
for (let i = 0; i < NUM_THREADS; i++){
workers[i].send({ data: argsData });
}
The above two files work exactly as you'd expect:
before
1: 843.353ms
1: 837.07ms
1: 848.494ms
1: 844.644ms
1: 847.34ms
1: 855.917ms
1: 896.467ms
async: 976.024ms
However, when I add the following to worker_thread.js, it all breaks. What it's importing is the transpiled version of my main typescript file.
const mainApp = require("../../built/src/server/main.js");
after
1: 846.826ms
1: 864.081ms
1: 873.927ms
1: 874.493ms
1: 921.775ms
1: 927.178ms
1: 942.86ms
async: 2.205s
Notice how the whole operation takes significantly longer, despite each individual one taking the same amount of time as before.
The contents of main are far too large to post here, but here's some of the imports/config at the top of the built file:
"use strict";
var __assign = (this && this.__assign) || function () {
...
Object.defineProperty(exports, "__esModule", { value: true });
exports.addTapInfoToAiParams = exports.evaluateFirstPlacements = exports.getBestMove = void 0;
var evaluator = require("./evaluator");
var aiModeManager = require("./ai_mode_manager");
var boardHelper = require("./board_helper");

So it turns out that deep in the dependency tree of the file I was requiring, it was reading a large text file from hard disk, which slowed down the worker thread significantly, and made multithreading harder.
Mystery solved. I doubt many other people will have this issue, but in case someone does, hopefully this saved you some time.

Scraping the same page forever using puppeteer

Doing scraping. How can I stay on a page and read the content to search for data every xx seconds without refresh the page? I use this way but the pc crashes after some time. Any ideas on how to make it efficient? I would like to achieve it without using while (true). The readOdds function does not always delay the same time.
//...
while(true){
const html = await page.content();
cant = await readOdds(html); // some code with the html
console.info('Waiting 5 seconds to read again...');
await page.waitFor(5000);
}
this is a section
async function readOdds(htmlPage){
try {
var savedat = functions.mysqlDateTime(new Date());
var pageHtml=htmlPage.replace(/(\r\n|\n|\r)/gm,"");
var exp_text_all = /<coupon-section(.*?)<\/coupon-section>/g;
var leagueLinksMatches = pageHtml.match(exp_text_all);
var cmarkets = 0;
let reset = await mysqlfunctions.promise_updateMarketsCount(cmarkets, table_markets_count, site);
console.log(reset);
if(leagueLinksMatches == null){
return cmarkets;
}
for (let i = 0; i < leagueLinksMatches.length; i++) {
const html = leagueLinksMatches[i];
var expc = /class="title ellipsis-text">(.*?)<\/span/g;
var nameChampionship = functions.getDataInHtmlCode(String(html).match(expc)[0]);
var idChampionship = await mysqlfunctions.promise_db_insert_Championship(nameChampionship, gsport, table_championship);
var exp_text = /<ui-event-line(.*?)<\/ui-event-line>/g;
var text = html.match(exp_text);
// console.info(text.length);
for (let index = 0; index < text.length; index++) {
const element = text[index];
....

Simple Solution with recursive callback
However before we go into that, you can try to run the function itself instead of while which will loop forever without any proper control.
const readLoop = async() => {
const html = await page.content();
cant = await readOdds(html);
return readLoop() // run the loop again
}
// invoke it for infinite callbacks without any delays at all
await readLoop();
Which will run the same block function continuously, without any delay, as long as your readOdds function returns. You won't have to use page.waitFor and while.
Memory leak prevention
For advanced cases where you have respawn over a period of time, Queue like bull and process manager like PM2 comes into play. However, queue will void your without refresh the page? part of your question.
You definitely should use pm2 though.
The usage is as follows,
npm i -g pm2
pm2 start index.js --name=myawesomeapp // or your app file
There are few useful arguments,
--max-memory-restart 100M, It can limit memory usage to 100M and restart itself.
--max-restarts 50, It will stop working once it restarts 50 times due to error (or memory leak).
You can check the logs using pm2 logs myawesomeapp as you set the name above.

Node.js and serialport ; Callback method?

I'm trying to list the all serial ports and select the port name that begins with /dev/cu.usbmodem. For context; it's an arduino hooked up to a RaspberryPi running node. The Raspberry Pi has a habit of renaming the ports every time it is rebooted.
So far I have this:
com.list(function (err, ports) {
ports.forEach(function(port) {
var arduinoPort = port.comName;
if (arduinoPort.substring(0, 16) == "/dev/cu.usbmodem") {
var SERIALPORT_ID = arduinoPort;
}
});
});
This takes long enough that this next statement fails as the SERIALPORT_ID variable has yet to be declared;
var serialPort = new com.SerialPort(SERIALPORT_ID, {
baudrate: 57600,
parser: com.parsers.readline('\r\n')
});
What callback or structuring technique will make the second statement wait for the first one to declare the variable before executing?

The function below assumes that in your result ports, there is only one serial port. I changed your ports.forEach to a standard for loop. I believe the work going on in this loop was synchronous. I think forEach is synchronous, but I know for(var i = 0; ....) is sync, and if we only have one proper 'port' then we want to be able to skip looping over the other results. This logic can easily be changed if my assumption on 'only one good port' is incorrect.
function getSerialPort(callback) {
'use strict';
com.list(function (err, ports) {
for (var i = 0; i < ports.length; i++) {//ports.forEach works too, but I know this is sync, and that's what we want in this case so we can break out of the loop when we find the right port
var port = ports[i];
var arduinoPort = port.comName;
if (arduinoPort.substring(0, 16) === "/dev/cu.usbmodem") {
var serialPort = new com.SerialPort(arduinoPort, {
baudrate: 57600,
parser: com.parsers.readline('\r\n')
});
callback(serialPort);
return;//I'm not sure what return does in a ports.forEach situation, so I changed it to a standard for loop, so that we know that this is breaking us out of it.
}
}
});
}
getSerialPort(function (serialPort) {
'use strict';
console.log('Serial Port: ' + serialPort);
});

The Raspberry Pi has a habit of renaming the ports every time it is
rebooted.
Well, you could also create some udev rules for the USB hardware you are using, so that the arduino will always be mapped to the same serial port. Assuming you are running debian...
vim /etc/udev/rules.d/98-usb-serial.rules
SUBSYSTEM=="tty", ATTRS{idVendor}=="2341", ATTRS{idProduct}=="0044", ATTRS{serial}=="64935343733351F072D0", SYMLINK+="arduinoUno"
SUBSYSTEM=="tty", ATTRS{idVendor}=="2341", ATTRS{idProduct}=="0043", ATTRS{serial}=="7523230313535121B0E1", SYMLINK+="arduinoMega"
To find out the vendor id, product id and serial number of a usb device use:
dmesg
lsusb
Unplug the device in question, plug it back in and it should be mapped to:
/dev/arduinoUno
/dev/arduinoMega

Or you can do that to look for the right port and connect automagically!
It works great on OS X and Ubuntu, I haven't tested it yet on Raspi, but you get the idea.
Thanks to ChrisCM for the "for" :)
var myPort;
function getSerialPort(callback) {
com.list(function (err, ports) {
for (var i = 0; i < ports.length; i++) {//ports.forEach works too, but I know this is sync, and that's what we want in this case so we can break out of the loop when we find the right port
var port = ports[i];
if(port.pnpId.indexOf("duino") != -1 || port.manufacturer.indexOf("duino") != -1 || port.comName.indexOf('moti') != -1){ // it look for "duino" somewhere
myPort = new SerialPort(port.comName,{
baudrate: 115200,
parser: serialport.parsers.readline("\r\n"),
});
callback(serialPort);
return;//I'm not sure what return does in a ports.forEach situation, so I changed it to a standard for loop, so that we know that this is breaking us out of it.
}
}
});
}
getSerialPort(function (myPort) {
console.log('Serial Port: ' + myPort);
});
You can also output all the port specs using:
console.log("pnpId: " + port.pnpId);
console.log("manufacturer: " + port.manufacturer);
console.log("comName: " + port.comName);
console.log("serialNumber: " + port.serialNumber);
console.log("vendorId: " + port.vendorId);
console.log("productId: " + port.productId);
to find a pattern you could use for automatic connection.
Hope it helps!

node.js crashing after loop iteration

I'm trying to write a performance tool using node.js so I can automate it, and store the results in MySQL. The tool is supposed to gather how long took for the browser to load a particular webpage. I'm using HttpWatch to measure the performance, and the result is displayed in seconds. The browser utilized is Firefox.
Below is a piece of script I'm using to run the performance test:
var MyUrls = [
"http://google.com",
"http://yahoo.com"
];
try {
var win32ole = require('win32ole');
var control = win32ole.client.Dispatch('HttpWatch.Controller');
var plugin = control.Firefox.New();
for (var i=0; i < MyUrls.length; i++) {
var url = MyUrls[i];
console.log(url);
for(var j=0; j < 14; j++) {
// Start Recording HTTP traffic
plugin.Log.EnableFilter(false);
// Clear Cache and cookier before each test
plugin.ClearCache();
plugin.ClearAllCookies();
plugin.ClearSessionCookies();
plugin.Record();
// Goto to the URL and wait for the page to be loaded
plugin.GotoURL(url);
control.Wait(plugin, -1);
// Stop recording HTTP
plugin.Stop();
if ( plugin.Log.Pages.Count != 0 )
{
// Display summary statistics for page
var summary = plugin.Log.Pages(0).Entries.Summary;
console.log(summary.Time);
}
}
}
plugin.CloseBrowser();
} catch(e) {
console.log('*** exception cached ***\n' + e);
}
After the second iteration of the inner loop, I'm getting the following error:
C:\xampp\htdocs\test\browser-perf>node FF-load-navigation.js
http://localhost/NFC-performance/Bing.htm
[Number (VT_R8 or VT_I8 bug?)]
2.718
[Number (VT_R8 or VT_I8 bug?)]
2.718
OLE error: [EnableFilter] -2147352570 [EnableFilter] IDispatch::GetIDsOfNames Au
toWrap() failed
Have someone seen this before? Can you help me?

You have to remember that node is asynchronous
So that for loop runs simultaneously to plugin.CloseBrowser();, which is obviously not what you want because thats causing it to close, which will cause problems in the for loop.
rather you want that to run after the for loop finishes.
Look at async for a simple way to do this.
async.each(MyUrls, function (callback) {
...
callback()
}, function(err){
plugin.CloseBrowser();
});
The same has to be done for your inner for loop.

Develop Reference

JavaScript is the programming language of the Web.

Kill all Node cluster workers once operation is complete - javascript

Related

Cordova iOS app using IndexedDB database size calculation without crashing app

Node.js worker threads stop parallelizing when I require a Typescript-compiled file

Scraping the same page forever using puppeteer

Node.js and serialport ; Callback method?

node.js crashing after loop iteration

Categories

Resources