node.js crashing after loop iteration - javascript

I'm trying to write a performance tool using node.js so I can automate it, and store the results in MySQL. The tool is supposed to gather how long took for the browser to load a particular webpage. I'm using HttpWatch to measure the performance, and the result is displayed in seconds. The browser utilized is Firefox.
Below is a piece of script I'm using to run the performance test:
var MyUrls = [
"http://google.com",
"http://yahoo.com"
];
try {
var win32ole = require('win32ole');
var control = win32ole.client.Dispatch('HttpWatch.Controller');
var plugin = control.Firefox.New();
for (var i=0; i < MyUrls.length; i++) {
var url = MyUrls[i];
console.log(url);
for(var j=0; j < 14; j++) {
// Start Recording HTTP traffic
plugin.Log.EnableFilter(false);
// Clear Cache and cookier before each test
plugin.ClearCache();
plugin.ClearAllCookies();
plugin.ClearSessionCookies();
plugin.Record();
// Goto to the URL and wait for the page to be loaded
plugin.GotoURL(url);
control.Wait(plugin, -1);
// Stop recording HTTP
plugin.Stop();
if ( plugin.Log.Pages.Count != 0 )
{
// Display summary statistics for page
var summary = plugin.Log.Pages(0).Entries.Summary;
console.log(summary.Time);
}
}
}
plugin.CloseBrowser();
} catch(e) {
console.log('*** exception cached ***\n' + e);
}
After the second iteration of the inner loop, I'm getting the following error:
C:\xampp\htdocs\test\browser-perf>node FF-load-navigation.js
http://localhost/NFC-performance/Bing.htm
[Number (VT_R8 or VT_I8 bug?)]
2.718
[Number (VT_R8 or VT_I8 bug?)]
2.718
OLE error: [EnableFilter] -2147352570 [EnableFilter] IDispatch::GetIDsOfNames Au
toWrap() failed
Have someone seen this before? Can you help me?

You have to remember that node is asynchronous
So that for loop runs simultaneously to plugin.CloseBrowser();, which is obviously not what you want because thats causing it to close, which will cause problems in the for loop.
rather you want that to run after the for loop finishes.
Look at async for a simple way to do this.
async.each(MyUrls, function (callback) {
...
callback()
}, function(err){
plugin.CloseBrowser();
});
The same has to be done for your inner for loop.

Related

Kill all Node cluster workers once operation is complete

I'm trying to create a cluster to spread hashing between CPUs, the one who finds the hash return that for me to use and kill all other workers when the first one responds.
I started my code by just creating the clusters and running the function, then added the "send" back to the master and then tried adding the logic to kill all workers,
I've read the documentation Killing node.js workers after function is done as reference but it doesn't seem to work - I can see a Node operation still running in the background (on a 2-core machine) using a ton of CPU, then I'll get some console log even when the Node process is finished and back on the bash terminal.
I can't for the life of me figure out where I'm going wrong so any assistance would be appreciated.
My current code is:
if (cluster.isMaster) {
for (let i = 0; i < numCPUs; i++) {
let worker = cluster.fork();
}
cluster.on('exit', function(worker, code, signal) {
for (var id in cluster.workers) {
cluster.workers[id].kill();
}
process.exit(0);
});
function messageHandler(msg) {
console.log(msg);
if (msg.hash.length > 1) {
console.log(msg.hash);
}
}
for (const id in cluster.workers) {
cluster.workers[id].on('message', messageHandler);
}
} else {
console.log(`Worker ${process.pid} started and finished`)
console.log(parseInt(cluster.worker.id));
let difficulty = 5;
i = cluster.worker.id;
var start = new Date();
var hrstart = process.hrtime();
hash = computeHash(index, lasthash, timestamp, data, i);
while (hash.substring(0, difficulty) !== Array(difficulty + 1).join("0")) {
hash = computeHash(index, lasthash, timestamp, data, i);
i = i + cluster.worker.id;
}
var end = new Date() - start,
hrend = process.hrtime(hrstart);
console.info('Execution time (hr): %ds %dms', hrend[0], hrend[1] / 1000000)
console.log("Hash found from: " + i);
process.send({
hash: hash
});
process.exit(0);
}
Okay so I managed to fix this in quite a hacky way. I discovered on Windows that the answer provided elsewhere does work (ie):
for (var id in cluster.workers) {
cluster.workers[id].kill();
}
However on Linux, you'll still have the process running even if you terminate the master. If you're like me and using blocking code on your child, it won't work (I also couldn't add a timeout into the function, for whatever reason).
The way I fixed this was to get a list of all remaining workers and their pids inside the loop, as before, only this time use process.kill with the pid of the remaining workers (like so:)
for (var id in cluster.workers) {
console.log("Killing remaining processes");
let process_id = cluster.workers[id].process.pid;
process.kill(process_id);
}
This solution is hacky, but it works and there is very limited examples out there so I hope this can help someone.

Scraping the same page forever using puppeteer

Doing scraping. How can I stay on a page and read the content to search for data every xx seconds without refresh the page? I use this way but the pc crashes after some time. Any ideas on how to make it efficient? I would like to achieve it without using while (true). The readOdds function does not always delay the same time.
//...
while(true){
const html = await page.content();
cant = await readOdds(html); // some code with the html
console.info('Waiting 5 seconds to read again...');
await page.waitFor(5000);
}
this is a section
async function readOdds(htmlPage){
try {
var savedat = functions.mysqlDateTime(new Date());
var pageHtml=htmlPage.replace(/(\r\n|\n|\r)/gm,"");
var exp_text_all = /<coupon-section(.*?)<\/coupon-section>/g;
var leagueLinksMatches = pageHtml.match(exp_text_all);
var cmarkets = 0;
let reset = await mysqlfunctions.promise_updateMarketsCount(cmarkets, table_markets_count, site);
console.log(reset);
if(leagueLinksMatches == null){
return cmarkets;
}
for (let i = 0; i < leagueLinksMatches.length; i++) {
const html = leagueLinksMatches[i];
var expc = /class="title ellipsis-text">(.*?)<\/span/g;
var nameChampionship = functions.getDataInHtmlCode(String(html).match(expc)[0]);
var idChampionship = await mysqlfunctions.promise_db_insert_Championship(nameChampionship, gsport, table_championship);
var exp_text = /<ui-event-line(.*?)<\/ui-event-line>/g;
var text = html.match(exp_text);
// console.info(text.length);
for (let index = 0; index < text.length; index++) {
const element = text[index];
....
Simple Solution with recursive callback
However before we go into that, you can try to run the function itself instead of while which will loop forever without any proper control.
const readLoop = async() => {
const html = await page.content();
cant = await readOdds(html);
return readLoop() // run the loop again
}
// invoke it for infinite callbacks without any delays at all
await readLoop();
Which will run the same block function continuously, without any delay, as long as your readOdds function returns. You won't have to use page.waitFor and while.
Memory leak prevention
For advanced cases where you have respawn over a period of time, Queue like bull and process manager like PM2 comes into play. However, queue will void your without refresh the page? part of your question.
You definitely should use pm2 though.
The usage is as follows,
npm i -g pm2
pm2 start index.js --name=myawesomeapp // or your app file
There are few useful arguments,
--max-memory-restart 100M, It can limit memory usage to 100M and restart itself.
--max-restarts 50, It will stop working once it restarts 50 times due to error (or memory leak).
You can check the logs using pm2 logs myawesomeapp as you set the name above.

setInterval refuses to run my function after recent edit to my code... why?

To give you a grasp of what I mean in my title.
Take a look at this code which is before the setInterval stopped working.
var anime = function(){
_.each(db.get('','animedb'), function(site){
var ann = function(){
^ the function is in a var
for (var epid in eps) {
epid = parseInt(epid, 10);
var eptime = (new Date(eps[epid].pubDate[0])*1000)/1000;
if(eptime > site.lastbuilddate){
counter = counter+1;
if(counter < 6){
list.push(font(colors['normal'])+eps[epid].title[0] +' - ['+ utils.secondsToString((new Date() - (eptime+site.delay))/1000, 1)+' ago.]</f>');
}
}
};
^ this is the part that breaks everything after its been edited
var run = setInterval(ann, site.interval*60000);
^ here is the setInterval its at the bottom of the each
anime();
^ here is the call for the whole function that calls the setInterval
The above code is part of an anime announcement for chat rooms owned by anime sites owners using their rss feeds.
The above code works and excuse me for saying this but at this point.
I'm going to say "I have no idea why". Because i really have no idea why setInterval picks and chooses when to work.
I talked to a friend who had more knowledge than me in javascript and time based functions and he said that there are no "conditions" required for setInterval to run.
for (var epid in eps) {
epid = parseInt(epid, 10);
var eptime = (new Date(eps[epid].pubDate[0])*1000)/1000;
if(eptime > site.lastbuilddate){
counter = counter+1;
if(counter < 6){
var url = eps[epid].link.split('//')[1];
var keyword = '';
var words = url.substr(0, url.length-1).split('/').join('-').split('-');
for (var wid in words) {
keyword += words[wid].charAt(0);
}
http.get({hostname:'dev.ilp.moe', port:80, path:'/surl/yourls-api.php?username=usernameremovedforsecurity&password=passwordremovedforsecurity&format=json&action=shorturl&url='+url+'&title='+ctitle+' - '+eps[epid].title[0]+'&keyword='+keyword}, function(r) {
if(r.statusCode === 200) { //200 is success
var b = '';
r.on('data', function(c) {
b += c;
});
r.on('end', function() {
list.push(font(colors['normal'])+eps[epid].title[0] +' - ['+ utils.secondsToString((new Date() - (eptime+site.delay))/1000, 1)+' ago.] - http://dev.ilp.moe/surl/'+keyword+'</f>');
}
}
});
}
}
};
The above code is the part for creating shorturls.
Here is the json DB that is being loaded.
{"0":{"lastbuilddate":1426441081000,"delay":0,"host":"www.animerush.tv","path":"/rss.xml","chats":["animerushtv"],"interval":15},"1":{"lastbuilddate":1424068119000,"delay":28800000,"host":"dubbedanime.tv","path":"/feed/","chats":["dubbed-anime-tv"],"interval":15},"2":{"lastbuilddate":1426415086000,"delay":32400000,"host":"bestanimes.tv","path":"/feed/","chats":["bestanimestv"],"interval":15},"3":{"lastbuilddate":1426434866000,"delay":0,"host":"www.theanime.tv","path":"/feed/","chats":["timecapsule"],"interval":15}}
The recent edit to my code was supposed to implement Shortened links for each episode released using the links provided in the rss feeds from the sites in the database.
The domain http://ilp.moe is my domain.
I have console logged everywhere and tested as much as I possibly could.
At this point I do not understand why the edit is making code that used to be executed by setInterval no longer be executed.
The reason why the code wasn't executed is because the functions were assigned to a variable so they weren't run until it got to setInterval.
When they reach setInterval the errors prevent setInterval from executing (depends on the severity of the error).
after taking the function and just running it without putting it in a var or setInterval and console logging for a bit i found the error was caused by this line
var url = eps[epid].link.split('//')[1];
in this case
eps[epid].link; // === ["http://blah.com/animelolep1"]
my problem was that the var url was trying to split on a list and not a string
here is the fix
var url = eps[epid].link[0].split('//')[1]; // grabs the first item from the list and then splits

Loading csv files content into dictionary of objects

I'm trying to load the content of the several CSV files into a new array. CSV files have a typical structure, with a label in the first row, and values (both string and real numbers) separated by commas. This part of code is responsible for loading the data for future use with Google Maps Api (not a problem for now, since I'm stuck on just loading the data). I would like to have a structure, in which I could call an element by it's name, that's why the var nodedata = {}; is created.
So the thing I totally don't get is why some part of the code is not being executed at all? console.log(nodedata); is empty, at least not in my Firefox console.
That's my attempt to the problem - links to the csv files are in the code.
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.js"></script>
<script src="http://jquery-csv.googlecode.com/files/jquery.csv-0.71.js"></script>
<script type="text/javascript">
var nodes = {};
var generation = {};
var nodedata = {};
$.get('https://dl.dropboxusercontent.com/u/25575808/energy/nodes.csv', function (response) {
nodes = $.csv.toObjects(response);
console.log(nodes);
});
$.get('https://dl.dropboxusercontent.com/u/25575808/energy/generation.csv', function (response) {
generation = $.csv.toObjects(response);
console.log(generation);
});
function getGeneration (nodename){
gen = 0;
for (var i = 0; i < generation.length; i++) {
if (generation[i].datetime == "2013-01-01 01:00"){
if (generation[i].node == nodename){
gen = gen + Number(generation[i]["output (MW)"])
}
}
}
return gen;
}
for (var i = 0; i < nodes.length; i++) {
nodedata[nodes[i].Node] = {
center: new google.maps.LatLng(nodes[i].Latitude,nodes[i].Longitude),
nodegen : getGeneration(nodes[i].Node)
}
}
console.log(nodedata);
I believe the problem you're having is unrelated to the usage of CSV data, rather it is the fact that the data is being loaded asynchronously.
You are executing 2 $.get() requests to load the files, which will take some time to download the files. The browser does not wait for them to finish before continuing through the rest of the code.
Therefore, it is possible for console.log(nodedate) to be executed before any data exists inside the nodes array.
An easy way to handle this is to stack your callback functions so that the first GET request completes -> run the 2nd GET request -> finally, run the processing code.
Check out this reorganization of the code: http://jsfiddle.net/Vr7sw/
(I removed the Google Maps line since I don't have the library loaded)
the problem is, the $.get requests are asynchronous (see jquery documentation), try to call to a function, into your callback body like this :
function nodesToJson(nodes) {
for (var i = 0; i < nodes.length; i++) {
nodedata[nodes[i].Node] = {
center: new google.maps.LatLng(nodes[node].Latitude,nodes[node].Longitude),
nodegen : getGeneration(nodes[i].Node)
}
}
console.log(nodedata);
}
$.get('https://dl.dropboxusercontent.com/u/25575808/energy/nodes.csv', function (response) {
nodes = $.csv.toObjects(response);
//when the request are ready, process the nodes
nodesToJson(nodes);
});

Looping Through Multiple JSON Requests (YouTube Data API)

Part of a website I am working on is a video page. I am pulling the videos from a YouTube account by accessing the YouTube Data API. Grabbing the videos in no particular order and not sorted works fine, but when I try to sort them into categories, I start running into trouble. Let's say there are three categories, Fruit, Vegetable, Pets. Instead of grabbing all the videos at once, I want to grab all the videos tagged with Fruit, append them to a <ul id="Fruit">. Then request all videos tagged with Vegetable, etc.
When starting out, I had the browser alert when it had finished getting the request and then appending the appropriate list. After I took out the alert, it still worked, but not the way I expected. Either the loop is advancing too quickly, or not advancing at all, but I can't seem to spot the mistake. What ends up happening is that all the videos get put into one list, <ul id="Vegetable">.
Please note: I am using a plugin called jGFeed which wraps the jQuery getJSON function, so I believe you can treat it as such.
var videoCategories = ['Fruit', 'Vegetable', 'Pets'];
for (var i = 0; i < videoCategories.length; i++) {
var thisCategory = videoCategories[i];
$.jGFeed('http://gdata.youtube.com/feeds/api/users/username/uploads?category='+thisCategory,
//Do something with the returned data
function(feeds) {
// Check for errors
if(!feeds) {
return false;
} else {
for(var j=0; j < feeds.entries.length(); j++) {
var entry = feeds.entries[i];
var videoUrl = entry.link;
$('ul#'+thisCategory).append('<li>'+entry.title+'</li>');
}
});
}
The problem is, you're using the 'thisCategory'-variable to set the category-name. The problem is, the value if this variable changes, while you're waiting for a response from the server.
You could try to put the whole script inside a function:
var videoCategories = ['Fruit', 'Vegetable', 'Pets'];
for (var i = 0; i < videoCategories.length; i++) {
getCategory(videoCategories[i]);
}
function getCategory(thisCategory)
{
$.jGFeed('http://gdata.youtube.com/feeds/api/users/username/uploads?category='+thisCategory,
//Do something with the returned data
function(feeds) {
// Check for errors
if(!feeds) {
return false;
} else {
for(var j=0; j < feeds.entries.length(); j++) {
var entry = feeds.entries[i];
var videoUrl = entry.link;
$('ul#'+thisCategory).append('<li>'+entry.title+'</li>');
}
});
}
I haven't tested this, so I'm not sure if it works..

Categories

Resources