Can't run Selenium PhantomJS instances in parallell - javascript

I'm using Selenium's node.js API to run PhantomJS instances against a series of web pages. The code I use to execute the actions on the pages work fine, but it seems only one instance of Selenium/PhantomJS can run at a time. This function is called multiple times from the same module and steps through pages in a webshop where the pagination is handled client side (which is why I need the Selenium/PhantomJS environment - to extract data from each page).
Once again, the code in and of itself works fine, but it can't execute in parallell. What could be causing this?
module.exports = function (crawler, page, parsePage, done) {
"use strict";
var _ = require("lodash"),
format = require("util").format,
path = require("path"),
webdriver = require("selenium-webdriver"),
By = webdriver.By,
until = webdriver.until;
var phantomPath = path.resolve(__dirname, "../node_modules/.bin/phantomjs"),
isWin = process.platform === "win32";
var driver = new webdriver.Builder()
.withCapabilities({
"phantomjs.binary.path": isWin ? phantomPath + ".cmd" : phantomPath
})
.forBrowser("phantomjs")
.build();
var windowHandle = new webdriver.WebDriver.Window(driver);
windowHandle.setSize(1100, 1000);
var getAllPagesContent = function (driver) {
var pagesContent = [],
pageNo = 1;
var getNextPage = function () {
var nextPageLink;
return driver.findElements(By.css(".pagination li")).then(function (elements) {
return elements[elements.length - 1];
}).then(function (element) {
nextPageLink = element;
return element.getAttribute("class");
}).then(function (className) {
return _.includes(className, "active");
}).then(function (isLastPage) {
return (!isLastPage) ? driver.getPageSource() : false;
}).then(function (content) {
if (content)
pagesContent.push(content);
content && console.log("Got page %d", pageNo++);
return nextPageLink.findElement(By.css("a")).then(function (element) {
return element.click();
}).then(function () {
return driver.wait(until.stalenessOf(nextPageLink), 10 * 1000);
}).then(function () {
return content ? getNextPage() : pagesContent;
});
});
};
return getNextPage();
};
var processTimeout = setTimeout(function () {
console.log("PhantomJS for page %s took too long to execute", page.url);
driver.quit().then(done);
}, 60 * 1000);
driver.get(page.url).then(function () {
var pageOverlay = driver.findElement(By.css("#overlay-the-new"));
return pageOverlay.isDisplayed().then(function (visible) {
if (visible) {
pageOverlay.click();
return driver.wait(until.elementIsNotVisible(pageOverlay), 10000);
}
}).then(function () {
return getAllPagesContent(driver);
});
}).then(function (contents) {
clearTimeout(processTimeout);
console.log("Got %d pages for %s", contents.length, page.url);
_.forEach(contents, function (pageContent) {
parsePage(page.url, pageContent);
});
return driver.quit();
}).then(function () {
done();
});
}

Although PhantomJS is now deprecated you can still run it in parallel isolated Docker containers by using Selenoid. There is a ready to use image with latest release here: https://hub.docker.com/r/selenoid/phantomjs/tags/

Parallel execution with Selenium tends to be done using Remote WebDrivers and the Selenium Grid2 Framework.
This tutorial at WeDoQA seems to be the sort of thing you want. At a brief glance it has each test in a separate class, while a central test base class points towards Grid2's hub, which then (in the tutorial) executes the tests in parallel using a Firefox driver. You could easily retool this to use phantomjs, but you might have to rework your test structure.

It seems you're only using one driver. I'd initialize a second driver, then use threading to run in parallel. I think this could get the job done.

Use Thread for running in parallel or you can use any test framework which can take care of running the tests in parallel.

Related

Nightwatch.js function not 'closing'

I'm trying to perform a function at the beginning of my test, then the rest of the test should be executed.
This is my custom-command (named internalAdviceLinksHtml):
var solr = require('solr-client')
exports.command = function() {
this
var client = solr.createClient('solr.dev.bauerhosting.com', 8080, 'cms', '/www.parkers.co.uk');
var globalSettingsQuery = client.createQuery()
.q({TypeName:'Bauer.Parkers.GlobalSettings'})
.start(0)
.rows(10);
client.search(globalSettingsQuery,function(err,obj) {
if (err) {
console.log(err);
} else {
var myresult = (obj.response.docs[0].s_InternalAdviceLinksHtml);
console.log(myresult.length);
if (myresult.length === 0) {
console.log('content block not configured');
} else {
console.log('content block configured');
}
}
});
return this;
};
Test-file (script):
module.exports = {
'set up the solr query': function (browser) {
browser
.solr_query.global_settings.internalAdviceLinksHtml();
},
'links above footer on advice landing page displayed': function (browser) {
browser
.url(browser.launch_url + browser.globals.carAdvice)
.assert.elementPresent('section.seo-internal-links')
},
'closing the browser': function (browser) {
browser
.browserEnd();
},
};
The function works correctly (i.e. if myresult length is 0 then "content block is not configured" is displayed, etc), but the following test ("links above footer on advice landing page displayed") is never invoked.
It seems like the execution stops after the custom-command. I'm sure this will be something quite obvious to someone, but I just can't seem to see what it is.
Any help would be greatly appreciated.
Regarding your internalAdviceLinksHtml custom-command, everything looks good from my point of view (I presume that lonely this was a typo).
Your hunch is correct, it seems that the Nightwatch test-runner fails to go to the next test, which is probably due to some promise not being resolved upstream (client.search function from internalAdviceLinksHtml).
I would recommend doing a return this immediately after outputting to console (content block not configured, or content block configured) and see if that fixes the problem:
client.search(globalSettingsQuery,function(err,obj) {
if (err) {
console.log(err);
} else {
var myresult = (obj.response.docs[0].s_InternalAdviceLinksHtml);
console.log(myresult.length);
if (myresult.length === 0) {
console.log('content block not configured');
} else {
console.log('content block configured');
}
}
return this
});
Also, a few extra pointers:
make use of the Nightwatch test-hooks to make your tests easier to read/maintain & create a separation of concern (setup => before/beforeEach hooks | teardown (e.g: browser.end()) => after/afterEach hooks);
you need not do an explicit browser.end() at the end of your test case. Check this answer for more information on the matter.
Your test-file would become:
module.exports = {
// > do your setup here <
before(browser) {
browser
.solr_query.global_settings.internalAdviceLinksHtml();
},
'links above footer on advice landing page displayed': function (browser) {
browser
.url(browser.launch_url + browser.globals.carAdvice)
.assert.elementPresent('section.seo-internal-links');
},
// > do your cleanup here <
after(browser) {
browser
.browserEnd();
},
};

Electron app stops rendering while running Javascript code

I'm currently working on an electron app that is essentially a DDNS launcher for a media server I control. Basically, it checks for an internet connection, gets the current IP for the server, then opens it in the system's default browser. However, the splash screen that I wrote is totally broken.
Whenever I launch the app on my system (using npm from the terminal), it loads the frame, but the image freezes loading at about the 1/3 point. It won't load the rest of the image until the script that it at the bottom of the main HTML page is finished executing.
Is there something I'm missing about this? I can provide excerpts of the code if needed.
EDIT:
Source code excerpt:
<script>
function wait(ms) {
var start = new Date().getTime();
var end = start;
while (end < start + ms) {
end = new Date().getTime();
}
}
const isOnline = require('is-online');
const isReachable = require('is-reachable');
const {
shell
} = require('electron');
window.onload = function() {
// Main Script
console.log('before');
wait(3000);
document.getElementById('progresstext').innerHTML = "Testing connection...";
bar.animate(0.15); // Number from 0.0 to 1.0
wait(250);
var amIOnline = false;
if (isOnline()) {
amIOnline = true;
}
console.log("Internet Test Ran");
if (!amIOnline) {
document.getElementById('errortext').innerHTML = "ERROR: No internet connection. Check the internet connection.";
document.getElementById('progresstext').innerHTML = "ERROR";
}
var isEmbyReachable = false;
if (isReachable('******')) {
isEmbyReachable = true;
document.getElementById('progresstext').innerHTML = "Connection Test: Passed";
//=> true
}
//Open Emby in the default browser
if (amIOnline && isEmbyReachable) {
shell.openExternal("*****");
}
};
</script>
Pastebin link to the full source: https://pastebin.com/u1iZeSSK
Thanks
Development System Specs: macOS Mojave 10.14, Latest stable build of electron
The problem is in your wait function, since node js is sigle threaded your wait function is blocking your process. You may try following code. But I really recommend you to take a look at how to write async functions in JavaScript and setInterval and setTimeout as a start.
But for the time you may try this code.
window.onload = function () {
// Main Script
console.log('before');
// wait 3 seconds
setTimeout(function () {
document.getElementById('progresstext').innerHTML = "Testing connection...";
bar.animate(0.15); // Number from 0.0 to 1.0
// wait 250 mills
setTimeout(function () {
var amIOnline = false;
if (isOnline()) {
amIOnline = true;
}
console.log("Internet Test Ran");
if (!amIOnline) {
document.getElementById('errortext').innerHTML = "ERROR: No internet connection. Check the internet connection.";
document.getElementById('progresstext').innerHTML = "ERROR";
}
var isEmbyReachable = false;
if (isReachable('******')) {
isEmbyReachable = true;
document.getElementById('progresstext').innerHTML = "Connection Test: Passed";
//=> true
}
//Open Emby in the default browser
if (amIOnline && isEmbyReachable) {
shell.openExternal("*****");
}
}, 250)
}, 3000)
};
You may not while or any other blocking loops to wait in JavaScript since it will block all other executions including page rendering.

Update HTML object with node.js and javascript

I'm new to nodejs and jquery, and I'm trying to update one single html object using a script.
I am using a Raspberry pi 2 and a ultrasonic sensor, to measure distance. I want to measure continuous, and update the html document at the same time with the real time values.
When I try to run my code it behaves like a server and not a client. Everything that i console.log() prints in the cmd and not in the browesers' console. When I run my code now i do it with "sudo node surveyor.js", but nothing happens in the html-document. I have linked it properly to the script. I have also tried document.getElementsByTagName("h6").innerHTML = distance.toFixed(2), but the error is "document is not defiend".
Is there any easy way to fix this?
My code this far is:
var statistics = require('math-statistics');
var usonic = require('r-pi-usonic');
var fs = require("fs");
var path = require("path");
var jsdom = require("jsdom");
var htmlSource = fs.readFileSync("../index.html", "utf8");
var init = function(config) {
usonic.init(function (error) {
if (error) {
console.log('error');
} else {
var sensor = usonic.createSensor(config.echoPin, config.triggerPin, config.timeout);
//console.log(config);
var distances;
(function measure() {
if (!distances || distances.length === config.rate) {
if (distances) {
print(distances);
}
distances = [];
}
setTimeout(function() {
distances.push(sensor());
measure();
}, config.delay);
}());
}
});
};
var print = function(distances) {
var distance = statistics.median(distances);
process.stdout.clearLine();
process.stdout.cursorTo(0);
if (distance < 0) {
process.stdout.write('Error: Measurement timeout.\n');
} else {
process.stdout.write('Distance: ' + distance.toFixed(2) + ' cm');
call_jsdom(htmlSource, function (window) {
var $ = window.$;
$("h6").replaceWith(distance.toFixed(2));
console.log(documentToSource(window.document));
});
}
};
function documentToSource(doc) {
// The non-standard window.document.outerHTML also exists,
// but currently does not preserve source code structure as well
// The following two operations are non-standard
return doc.doctype.toString()+doc.innerHTML;
}
function call_jsdom(source, callback) {
jsdom.env(
source,
[ 'jquery-1.7.1.min.js' ],
function(errors, window) {
process.nextTick(
function () {
if (errors) {
throw new Error("There were errors: "+errors);
}
callback(window);
}
);
}
);
}
init({
echoPin: 15, //Echo pin
triggerPin: 14, //Trigger pin
timeout: 1000, //Measurement timeout in µs
delay: 60, //Measurement delay in ms
rate: 5 //Measurements per sample
});
Node.js is a server-side implementation of JavaScript. It's ok to do all the sensors operations and calculations on server-side, but you need some mechanism to provide the results to your clients. If they are going to use your application by using a web browser, you must run a HTTP server, like Express.js, and create a route (something like http://localhost/surveyor or just http://localhost/) that calls a method you have implemented on server-side and do something with the result. One possible way to return this resulting data to the clients is by rendering an HTML page that shows them. For that you should use a Template Engine.
Any DOM manipulation should be done on client-side (you could, for example, include a <script> tag inside your template HTML just to try and understand how it works, but it is not recommended to do this in production environments).
Try searching google for Node.js examples and tutorials and you will get it :)

BreezeJs with dedicated web worker

I am trying to initialize a Breeze manager inside a 'Web Worker'.
RequireJs, knockout, q, breeze are being imported inside the worker.
After a call to:EntityQuery.from('name').using(manager).execute(),
the following error appears:
Uncaught Error: Q is undefined. Are you missing Q.js? See https://github.com/kriskowal/q.
A live preview is uploaded here http://plnkr.co/edit/meXjKa?p=preview
(plunk supports downloading for easier debug).
EDIT -- relevant code
Worker.js
importScripts('knockout.js', 'q.js', 'breeze.js', 'require.js');
define('jquery', function () { return jQuery; });
define('knockout', ko);
define('q', Q); //Just trying to assign q since breeze requests Q as q
require(function () {
var self = this;
this.q = this.Q; //Just trying to assign q since breeze requests Q as q
breeze.NamingConvention.camelCase.setAsDefault();
var manager = new breeze.EntityManager("breeze/Breeze");
var EntityQuery = breeze.EntityQuery;
// Q or q here is defined (TESTED)
var test = function (name) {
return EntityQuery.from(name)
.using(manager).execute() // <-- Here q/Q breaks (I think on execute)
};
var primeData = function () {
return test('Languages')
.then(test('Lala'))
.then(test('Lala2'))
};
primeData();
setTimeout(function () { postMessage("TestMan"); }, 500);
});
Worker will be initialized on main page as:
var myWorker = new Worker("worker.js");
Ok here it goes:
Create a new requireJs and edit the
isBrowser = !!(typeof window !== 'undefined' && typeof navigator !== 'undefined' && window.document)
to
isBrowser = false
Create a new Jquery so it uses nothing related to window and generally anything that a WebWorker cannot access. Unfortunatelly i can't remember where i got this Custom JQueryJs but i have uploaded it here "https://dl.dropboxusercontent.com/u/48132252/jqueydemo.js".
Please if you find the author or the original change link and give credit.
My workerJs file looks like:
importScripts('Scripts/test.js', 'Scripts/jqueydemo.js', 'Scripts/q.js', 'Scripts/breeze.debug.js', 'Scripts/require2.js');
define('jquery', function () { return jQuery; });
require(
{
baseUrl: "..",
},
function () {
var manager = new breeze.EntityManager("breeze/Breeze");
var EntityQuery = breeze.EntityQuery;
var primeData = function () {
return EntityQuery.from(name)
.using(manager).execute() // Get my Data
.then(function (data) {
console.log("fetced!\n" + ((new Date()).getTime()));
var exportData = manager.exportEntities(); // Export my constructed entities
console.log("created!\n" + ((new Date()).getTime()));
var lala = JSON.stringify(exportData)
postMessage(lala); // Send them as a string to the main thread
})
};
primeData();
});
Finally on my mainJs i have something like:
this.testWorker = function () {
var myWorker = new Worker("worker.js"); // Init Worker
myWorker.onmessage = function (oEvent) { // On worker job finished
toastr.success('Worker finished and returned');
var lala = JSON.parse(oEvent.data); // Reverse string to JSON
manager.importEntities(lala); // Import the pre-Constructed Entities to breezeManager
toastr.success('Import done');
myWorker.terminate();
};
};
So we have managed to use breeze on a WebWorker enviroment to fetch and create all of our entities, pass our exported entities to our main breeze manager on the main thread(import).
I have tested this with 9 tables fully related to each other and about 4MB of raw data.
PROFIT: UI stays fully responsive all the time.
No more long execution script, application not responding or out of memory errors) at least for chrome
*As it makes sense breeze import entities is way more faster than the creation a full 4MB raw data plus the association process following for these entities.
By having all the heavy work done on the back, and only use import entities on the front, breeze allows you to handle large datasets 'like a breeze'.

Using Multiple page.open in Single Script

My goal is to execute PhantomJS by using:
// adding $op and $er for debugging purposes
exec('phantomjs script.js', $op, $er);
print_r($op);
echo $er;
And then inside script.js, I plan to use multiple page.open() to capture screenshots of different pages such as:
var url = 'some dynamic url goes here';
page = require('webpage').create();
page.open(url, function (status) {
console.log('opening page 1');
page.render('./slide1.png');
});
page = require('webpage').create();
page.open(url, function (status) {
console.log('opening page 2');
page.render('./slide2.png');
});
page = require('webpage').create();
page.open(url, function (status) {
console.log('opening page 3');
page.render('./slide3.png');
phantom.exit(); //<-- Exiting phantomJS only after opening all 3 pages
});
On running exec, I get the following output on page:
Array ( [0] => opening page 3 ) 0
As a result I only get the screenshot of the 3rd page. I'm not sure why PhantomJS is skipping the first and second blocks of code (evident from the missing console.log() messages that were supposed to be output from 1st and 2nd block) and only executing the third block of code.
The problem is that the second page.open is being invoked before the first one finishes, which can cause multiple problems. You want logic roughly like the following (assuming the filenames are given as command line arguments):
function handle_page(file){
page.open(file,function(){
...
page.evaluate(function(){
...do stuff...
});
page.render(...);
setTimeout(next_page,100);
});
}
function next_page(){
var file=args.shift();
if(!file){phantom.exit(0);}
handle_page(file);
}
next_page();
Right, it's recursive. This ensures that the processing of the function passed to page.open finishes, with a little 100ms grace period, before you go to the next file.
By the way, you don't need to keep repeating
page = require('webpage').create();
I've tried the accepted answer suggestions, but it doesn't work (at least not for v2.1.1).
To be accurate the accepted answer worked some of the time, but I still experienced sporadic failed page.open() calls, about 90% of the time on specific data sets.
The simplest answer I found is to instantiate a new page module for each url.
// first page
var urlA = "http://first/url"
var pageA = require('webpage').create()
pageA.open(urlA, function(status){
if (status){
setTimeout(openPageB, 100) // open second page call
} else{
phantom.exit(1)
}
})
// second page
var urlB = "http://second/url"
var pageB = require('webpage').create()
function openPageB(){
pageB.open(urlB, function(){
// ...
// ...
})
}
The following from the page module api documentation on the close method says:
close() {void}
Close the page and releases the memory heap associated with it. Do not use the page instance after calling this.
Due to some technical limitations, the web page object might not be completely garbage collected. This is often encountered when the same object is used over and over again. Calling this function may stop the increasing heap allocation.
Basically after I tested the close() method I decided using the same web page instance for different open() calls is too unreliable and it needed to be said.
You can use recursion:
var page = require('webpage').create();
// the urls to navigate to
var urls = [
'http://phantomjs.org/',
'https://twitter.com/sidanmor',
'https://github.com/sidanmor'
];
var i = 0;
// the recursion function
var genericCallback = function () {
return function (status) {
console.log("URL: " + urls[i]);
console.log("Status: " + status);
// exit if there was a problem with the navigation
if (!status || status === 'fail') phantom.exit();
i++;
if (status === "success") {
//-- YOUR STUFF HERE ----------------------
// do your stuff here... I'm taking a picture of the page
page.render('example' + i + '.png');
//-----------------------------------------
if (i < urls.length) {
// navigate to the next url and the callback is this function (recursion)
page.open(urls[i], genericCallback());
} else {
// try navigate to the next url (it is undefined because it is the last element) so the callback is exit
page.open(urls[i], function () {
phantom.exit();
});
}
}
};
};
// start from the first url
page.open(urls[i], genericCallback());
Using Queued Processes, sample:
var page = require('webpage').create();
// Queue Class Helper
var Queue = function() {
this._tasks = [];
};
Queue.prototype.add = function(fn, scope) {
this._tasks.push({fn: fn,scope: scope});
return this;
};
Queue.prototype.process = function() {
var proxy, self = this;
task = this._tasks.shift();
if(!task) {return;}
proxy = {end: function() {self.process();}};
task.fn.call(task.scope, proxy);
return this;
};
Queue.prototype.clear = function() {
this._tasks = []; return this;
};
// Init pages .....
var q = new Queue();
q.add(function(proxy) {
page.open(url1, function() {
// page.evaluate
proxy.end();
});
});
q.add(function(proxy) {
page.open(url2, function() {
// page.evaluate
proxy.end();
});
});
q.add(function(proxy) {
page.open(urln, function() {
// page.evaluate
proxy.end();
});
});
// .....
q.add(function(proxy) {
phantom.exit()
proxy.end();
});
q.process();
I hope this is useful, regards.

Categories

Resources