I am using CasperJS testing framework to make some test suite since almost a month now, but I am facing a problem in one of them.
Here is what I want to do: I am browsing a url (page1) and I have to make another action from an another url (simulate a new tab like we have on our graphics browser) without quitting the first one (page1). The action from the second url is going to change my first one. Hope it's clear enough :)
So for now when I reach the step to observe that on my first url I open the second one by doing a thenOpen(), so it's making a new navigation step and I am losing the current session and I cannot come back on it. I try many way such as using the history, reopen the page, using the event from CasperJS and also I try with PhantomJS but without success.
Here is some pseudo code to make it clearer:
casper.test.begin("A random test suite", 0, function testSuite(test) {
casper.start(url1, function () {
casper.then(function() {
// do some action on the first url
});
casper.then(function () {
// open url2 and do some action in a new tab to not lose the session of url1
});
casper.then(function () {
// check url1 (who should be still open)
});
});
casper.run(function () {
test.done();
});
});
I really would like to use CasperJS to do that but I start to think it is not possible, and I am starting to look in different solution such as this post:
CasperJS, parallel browsing WITH the testing framework. But I have never use node.js before so if it's the only way please show me some example.
Generally, it's not possible because a casper script runs inside only one phantomjs runtime. In your case it seems possible.
Note: Because this relies on a second casper instance, this cannot be used in a casper test environment.
You can create a new casper instance (casper2) inside one step of the outer casper instance (casper1). You then have to instruct casper1 to wait for completion of the casper2 instance, since casper is asynchronous in nature. Keep in mind that this is exactly like a new tab, so the instances will share the cache, cookies and storage.
Here is an sample script:
var casper1 = require('casper').create();
var casper2done = false;
casper1.start("http://www.example.com").then(function(){
casper1.capture("casper1_1.png");
var casper2 = require('casper').create();
casper2.start("http://stackoverflow.com/contact").then(function(){
casper1.echo(casper2.getCurrentUrl(), casper2.getTitle());
casper2.capture("casper2.png");
}).run(function(){
this.echo("DONE 2");
casper2done = true;
});
}).waitFor(function check(){
return casper2done;
}).then(function(){
casper1.echo(casper1.getCurrentUrl(), casper1.getTitle()); // Comment to fix answer (min 6 chars)
casper1.capture("casper1_2.png");
}).run(function(){
this.echo("DONE");
this.exit();
});
Here I use the promise chaining/builder pattern. You can even make your own function to hide the complexity and make it repeatedly usable:
var casper = require('casper').create();
// IIFE to hide casper2done variable
(function(casper){
var casper2done = false;
casper.newTab = function(url, then, timeout){
if (typeof url !== "string" || typeof then !== "function") {
throw "URL or then callback are missing";
}
this.then(function(){
var casper2 = require('casper').create();
casper2.start(url).then(then).run(function(){
casper2done = true;
});
}).waitFor(function check(){
return casper2done;
}, null, null, timeout).then(function(){
casper2done = false;
});
return this;
};
})(casper);
casper.start("http://www.example.com").newTab("http://stackoverflow.com/contact", function(){
// this is casper2
this.echo(this.getCurrentUrl(), this.getTitle());
this.capture("casper2_1.png");
this.thenClick("a#nav-askquestion");
this.then(function(){
this.echo(this.getCurrentUrl(), this.getTitle());
this.capture("casper2_2.png");
});
}, 15000).then(function(){
// this is casper
this.echo(casper.getCurrentUrl(), casper.getTitle());
this.capture("casper1.png");
}).run(function(){
this.echo("DONE");
this.exit();
});
You can use multiple steps in your child casper instance, but don't forget to specify a good timeout.
Related
I'm using a page loader on the front page of my website, I'd like it to run only the first time someone visits my website, so later on since the page will be cached it'll execture faster and thus I won't need the loader the next times he visits.
I thought using storing a signal to caches/cookies to do so, but I have no idea how ?
here is the loader javascript :
function myFunction() {
var myVar = setTimeout(showPage, 1000);
}
function showPage() {
$("#loader_sec").css("display","none");
$("#bodyloader").css("display","block");
}
myFunction();
<div id="loader_sec">
...
</div>
How should I configure caches/cookies to launch this code only the first time someone visits ? If there are better ways to do so please suggest.
Try this:
function myFunction() {
var myVar = setTimeout(showPage, 1000);
}
function showPage() {
$("#loader_sec").css("display","none");
$("#bodyloader").css("display","block");
}
if(!localStorage.getItem("visited")){
myFunction();
localStorage.setItem("visited",true);
}
<div id="loader_sec">
...
</div>
Try with Session/local storage. Like this -
$(window).load(function () {
$(function () {
if (!sessionStorage.getItem("runOnce")) {
// Your code goes here....
sessionStorage.setItem("runOnce", true);
}
});
});
You'll need to persist some data on the user's device in order to know that they have visited your site before. You would do this via local storage or cookies.
Here is a simple library you can use to read/write cookies: https://developer.mozilla.org/en-US/docs/Web/API/Document/cookie/Simple_document.cookie_framework
You could write a value to a cookie when the user has been shown the page loader. A check for this value when the page loads will let you know if the loader has been displayed previously or not and inform your decision about whether it should be displayed this time.
I would do the logic in a caching/data access layer. You can use https://github.com/kriskowal/q for executing functions as a callback when another function completes.
Then you can do
getData(key)
.then(function () {
hideLoader(); // Function for your show/hide logic
});
or something of the sort.
getData would look something like:
var getData = function (key) {
var deferred = Q.defer();
if (cache[key]) {
deferred.resolve(cache[key]);
} else {
//Data request logic that returns 'data'
deferred.resolve(data);
}
return deferred.promise;
};
This way you're not guessing how long your requests are going to take and you're not forcing a second long load time on your user because the page will load as soon as your data has been retrieved.
By the way, the cache here is just a key/value store. Aka var cache = {};
I´ve recently started using horseman.js to scrap a page with node. I can´t figure out how exactly it works and I can´t find good examples on the internet.
My main goal is to log on a platform and extract some data. I´ve managed to do this with PhantomJS, but know I want to learn how to do it with horseman.JS.
My code should open the login page, fill the login and password inputs and click on the "login" button. Pretty easy so far. However, after clicking on the "login" button the site makes 2 redirects before loading the actual page where I want to work.
My problem is that I don´t know how to make my code wait for that page.
With phantomJS I had a workaround with the page URL. The following code shows how I´ve managed to do it with phantomJS and it works just fine:
var page = require('webpage').create();
var urlHome = 'http://akna.com.br/site/montatela.php?t=acesse&header=n&footer=n';
var fillLoginInfo = function(){
$('#cmpLogin').val('mylogin');
$('#cmpSenha').val('mypassword');
$('.btn.btn-default').click();
};
page.onLoadFinished = function(){
var url = page.url;
console.log("Page Loaded: " + url);
if(url == urlHome){
page.evaluate(fillLoginInfo);
return;
}
// After the redirects the url has a "sid" parameter, I wait for that to apear when the page loads.
else if(url.indexOf("sid=") >0){
//Keep struggling with more codes!
return;
}
}
page.open(urlHome);
However, I can´t find a way to handle the redirects with horseman.JS.
Here is what I´ve been trying with horseman.JS without any success:
var Horseman = require("node-horseman");
var horseman = new Horseman();
var urlHome = 'http://akna.com.br/site/montatela.php?t=acesse&header=n&footer=n';
var fillLoginInfo = function(){
$('#cmpLogin').val('myemail');
$('#cmpSenha').val('mypassword');
$('.btn.btn-default').click();
}
var okStatus = function(){
return horseman.status();
}
horseman
.open(urlHome)
.type('input[name="cmpLogin"]','myemail')
.type('input[name="cmpSenha"]','mypassword')
.click('.btn-success')
.waitFor(okStatus, 200)
.screenshot('image.png')
.close();
How do I handle the redirects?
I'm currently solving the same problem, and my best solution so far is to use the waitForSelector method to target something on the final page.
E.g.
horseman
.open(urlHome)
.type('input[name="cmpLogin"]','myemail')
.type('input[name="cmpSenha"]','mypassword')
.click('.btn-success')
.waitForSelector("#loginComplete")
.screenshot('image.png')
.close();
Of course you have to know the page you're waiting for to do this.
If you know there are two redirects, you can use the approach of .waitForNextPage() twice. A naive approach if you didn't know how many redirects to expect would be to chain these until a timeout is reached (I don't recommend this as it will be slow!),
Perhaps a cleverer way, you can also use on events to capture redirects, like .on('navigationRequested') or .on('urlChanged').
Although it doesn't answer your question directly, this link may help: https://github.com/ariya/phantomjs/issues/11507
I'm trying to parse the status page of my router to get the number of wlan devices. The page uses some JavaScript to get the status, so I tried to use PhantomJS, but had no luck.
This is the html source of the status page (status.html and status.js): http://pastebin.com/dmvptBqv
The developer tools of my browser show me this output on the console (anonymized):
([
{"vartype":"value","varid":"device_name","varvalue":"Speedport W 921V"},
{"vartype":"value","varid":"factorydefault","varvalue":"1"},
{"vartype":"value","varid":"rebooting","varvalue":"0"},
{"vartype":"value","varid":"router_state","varvalue":"OK"},
{"vartype":"value","varid":"bngscrat","varvalue":"0"},
{"vartype":"value","varid":"acsreach","varvalue":"0"},
Full reference
How can I get this evaluated output out of PhantomJS? Maybe it is very simple and I just missed the part in the documentation.
I think that i have to use the evluate function, but have no idea what is the correct function for the document object to return the complete evaluation.
var webPage = require('webpage');
var page = webPage.create();
page.open('blubb', function (status) {
var js= page.evaluate(function() {
return document.???;
});
console.log(js);
phantom.exit();
});
The main problem that you have is to get the console messages from the page into a single structure that you can do further processing on. This is easily done with the following code which waits indefinitely until the first console message appears and stops waiting as soon as no further messages appeared during 1 second.
var logs = []
timeoutID;
page.onConsoleMessage = function(msg){
if (timeoutID) clearTimeout(timeoutID);
logs.push(msg); // possibly also further processing
timeoutID = setTimeout(function(){
page.onConsoleMessage = function(msg){
console.log("CONSOLE: " + msg);
};
// TODO: further processing
console.log(JSON.stringify(logs, undefined, 4));
phantom.exit();
}, 1000);
};
page.open(url); // wait indefinitely
If each msg is valid JSON, then you can parse it immediately to get JavaScript objects. Change
logs.push(msg);
to
logs.push(JSON.parse(msg));
I have 2 code samples as following:
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
console.log('here');
phantom.exit(); // case 1: inside page.evaluate()
});
});
});
-> In this case, phantom.exit() is put inside page.evaluate(), and error appears: "ReferenceError: Can't find variable: phantom"
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
console.log('here');
});
phantom.exit(); // case 2: outside page.evaluate()
});
});
-> In case 2: phantom.exit() is put outside page.evaluate(). There is no error, but 'here' is not printed out.
=> Neither of 2 above ways can print out 'here'. Is there any effective way to print 'here' without error?
You can't use any of the phantom method inside of the page, in the same way the page itself can't use them.
The phantom object is a special host object provided by the runtime to the script it initially launches. Once a page has been loaded, the page runs in its own context with the usual browser variables, which do not include phantom.
You need to wait for the page.evaluate to call back to the phantom script, then call exit from there. The Phantom docs show this exact case, with exit being called after evaluate has returned. They don't seem to cover whether evaluate is blocking or not, but the example suggests that it may be.
To capture the log messages from the page, you need to subscribe to its onConsoleMessage event. The Phantom console only picks up messages from the script it is running, not any pages that it may load. All you need to do is route page messages to the phantom console:
page.onConsoleMessage = function(msg) {
console.log('PAGE: ' + msg);
};
Try returning required data back and do console.log() after evaluation
Example
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
return "here";
});
console.log(t);
phantom.exit(); // case 2: outside page.evaluate()
});
});
I'm currently trying to write the page source code into a text file by a URL. Everything works well, but I want to additionally inject a JavaScript file. The problem is that the file does not include properly. Only the last pages that are loaded, but others are incomplete.
//phantomjs C:\PhantomJS\Script\test1.js
var fs = require('fs');
var numeroEpisode = 0;
var maxEpisode = 10;
var fichierLien = fs.read('C:\\PhantomJS\\Fichier\\lien.txt');
var ListeLien = fichierLien.split(/[\n]/);
var page = require('webpage').create();
function GetPage()
{
if (numeroEpisode > maxEpisode)
{
phantom.exit();
}
page.open(ListeLien[numeroEpisode], function(status)
{
if(status !== 'success')
{
console.log('Impossible de charger la page.');
}
else
{
console.log('URL: '+ListeLien[numeroEpisode]+'');
page.injectJs('http://mylink.com', function() { });
var path = 'C:\\PhantomJS\\Fichier\\episode_'+numeroEpisode+'.html';
fs.write(path, page.content, 'w');
setTimeout(GetPage, 15000); // run again in 15 seconds
numeroEpisode++;
}
});
}
GetPage();
Don't mix up page.injectJs() and page.includeJs().
injectJs(filename): Loads a local JavaScript file into the page and evaluates it synchronously.
includeJs(url, callback): Loads a remote JavaScript file from the specified URL and evaluates it. Since it has to request a remote resource, this is done asynchronously. The passed callback is called as soon as the operation finished. If you don't use the callback, your code will most likely run before the remote JavaScript was included. Use that callback:
page.includeJs('http://mylink.com', function() {
var path = 'C:\\PhantomJS\\Fichier\\episode_'+numeroEpisode+'.html';
fs.write(path, page.content, 'w');
numeroEpisode++;
setTimeout(GetPage, 15000); // run again in 15 seconds
});
Since the JavaScript that you load changes something on the page, you probably need to load it after all the pages script have run. If this is a JavaScript heavy page, then you need to wait a little. You can wait a static amount of time:
setTimeout(function(){
page.includeJs('http://mylink.com', function() {
//...
});
}, 5000); // 5 seconds
or utilize waitFor to wait until an element appears that denotes that the page is completely loaded. This can be very tricky sometimes.
If you still want to use injectJs() instead of includeJs() (for example because of its synchronous nature), then you need to download the external JavaScript file to your machine and then you can use injectJs().