Cannot put phantom.exit() inside page.evaluate() in phantomjs - javascript

I have 2 code samples as following:
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
console.log('here');
phantom.exit(); // case 1: inside page.evaluate()
});
});
});
-> In this case, phantom.exit() is put inside page.evaluate(), and error appears: "ReferenceError: Can't find variable: phantom"
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
console.log('here');
});
phantom.exit(); // case 2: outside page.evaluate()
});
});
-> In case 2: phantom.exit() is put outside page.evaluate(). There is no error, but 'here' is not printed out.
=> Neither of 2 above ways can print out 'here'. Is there any effective way to print 'here' without error?

You can't use any of the phantom method inside of the page, in the same way the page itself can't use them.
The phantom object is a special host object provided by the runtime to the script it initially launches. Once a page has been loaded, the page runs in its own context with the usual browser variables, which do not include phantom.
You need to wait for the page.evaluate to call back to the phantom script, then call exit from there. The Phantom docs show this exact case, with exit being called after evaluate has returned. They don't seem to cover whether evaluate is blocking or not, but the example suggests that it may be.
To capture the log messages from the page, you need to subscribe to its onConsoleMessage event. The Phantom console only picks up messages from the script it is running, not any pages that it may load. All you need to do is route page messages to the phantom console:
page.onConsoleMessage = function(msg) {
console.log('PAGE: ' + msg);
};

Try returning required data back and do console.log() after evaluation
Example
var page = require('webpage').create();
page.open('https://www.youtube.com', function(s) {
console.log(s);
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var t = page.evaluate(function() {
return "here";
});
console.log(t);
phantom.exit(); // case 2: outside page.evaluate()
});
});

Related

PhantomJS onPrompt callback

I am trying to get the onPrompt callback working with PhantomJS.
Just for testing I have a basic angular application that prompts the user on initialization and displays that data on the page.
It works fine when I enter the information into the prompt manually, but it will not work when using the PhantomJS onPrompt callback.
Here's the angular app:
angular
.module('app')
.controller('test', TestController)
function TestController() {
var vm = this;
vm.$onInit = onInit;
vm.testData = '';
function onInit() {
vm.testData = prompt('Name?');
}
}
This is the code I running with PhantomJS
var page = require('webpage').create();
page.open('http://localhost:3000', function() {
console.log('test')
page.onPrompt = function(msg, defaultVal) {
console.log("MESSAGE", msg)
return "Dog";
};
page.render('test.pdf');
phantom.exit();
});
I would expect to get a console.log that says "MESSAGE" and the text from the prompt and also a screenshot of my page with "Dog" displayed.
I get a screenshot of a blank page and no console log.
I would ideally like to use this callback with node webshot as an option. webshot phantom callbacks
Thanks for the help.
There is a context in which interaction with the actual interaction with the page takes place - usually within the page.evaluate(..) function. The onPrompt callback will execute within that context and will not print to the console as you expect - I think you need to marshal the output back to the console by setting the onConsoleMessage callback first:
page.onConsoleMessage = function (msg) {
console.log(msg);
};
And then it should print to the console as you expect :)

Grab JavaScript console output with PhantomJS and evaluate it

I'm trying to parse the status page of my router to get the number of wlan devices. The page uses some JavaScript to get the status, so I tried to use PhantomJS, but had no luck.
This is the html source of the status page (status.html and status.js): http://pastebin.com/dmvptBqv
The developer tools of my browser show me this output on the console (anonymized):
([
{"vartype":"value","varid":"device_name","varvalue":"Speedport W 921V"},
{"vartype":"value","varid":"factorydefault","varvalue":"1"},
{"vartype":"value","varid":"rebooting","varvalue":"0"},
{"vartype":"value","varid":"router_state","varvalue":"OK"},
{"vartype":"value","varid":"bngscrat","varvalue":"0"},
{"vartype":"value","varid":"acsreach","varvalue":"0"},
Full reference
How can I get this evaluated output out of PhantomJS? Maybe it is very simple and I just missed the part in the documentation.
I think that i have to use the evluate function, but have no idea what is the correct function for the document object to return the complete evaluation.
var webPage = require('webpage');
var page = webPage.create();
page.open('blubb', function (status) {
var js= page.evaluate(function() {
return document.???;
});
console.log(js);
phantom.exit();
});
The main problem that you have is to get the console messages from the page into a single structure that you can do further processing on. This is easily done with the following code which waits indefinitely until the first console message appears and stops waiting as soon as no further messages appeared during 1 second.
var logs = []
timeoutID;
page.onConsoleMessage = function(msg){
if (timeoutID) clearTimeout(timeoutID);
logs.push(msg); // possibly also further processing
timeoutID = setTimeout(function(){
page.onConsoleMessage = function(msg){
console.log("CONSOLE: " + msg);
};
// TODO: further processing
console.log(JSON.stringify(logs, undefined, 4));
phantom.exit();
}, 1000);
};
page.open(url); // wait indefinitely
If each msg is valid JSON, then you can parse it immediately to get JavaScript objects. Change
logs.push(msg);
to
logs.push(JSON.parse(msg));

jQuery Ajax doesn't work in PhantomJS

Whats wrong with this code?
I'm trying to send a post request using jQuery ajax from PhantomJS, but it returns nothing besides "post:"
var webPage = require('webpage');
var page = webPage.create();
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js', function() {
console.log('post:');
$.post("http://httpbin.org/post", function(data) {
console.log(data);
});
});
PhantomJS has two contexts. page.includeJs() instructs the DOM context (page context) to load the given JavaScript file. The callback is called when it is done. It means jQuery will only be available in the page context and never outside of it. You get access to the page context through page.evaluate().
Example:
page.onConsoleMessage = function(msg){
console.log("remote> " + msg);
};
page.includeJs('http://ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js', function() {
page.evaluate(function(){
console.log('post:');
$.post("http://httbpin.org/post", function(data) {
console.log(data);
});
});
setTimeout(function(){
// don't forget to exit
phantom.exit();
}, 2000);
});
You will have to run PhantomJS with the --web-security=false commandline option, otherwise it won't be able to send the request because of cross-domain restrictions:
phantomjs --web-security=false script.js
Please note that page.evaluate() is sandboxed. Please read the documentation fully.
The problem is related to security, you're trying to access a different domain.
In chrome it is possible to disable cross domain restrictions executing the following command in console:
chromium-browser --disable-web-security
Also you can add these flags to your direct access.

PhantomJS page.injectJs doesn't work

I'm currently trying to write the page source code into a text file by a URL. Everything works well, but I want to additionally inject a JavaScript file. The problem is that the file does not include properly. Only the last pages that are loaded, but others are incomplete.
//phantomjs C:\PhantomJS\Script\test1.js
var fs = require('fs');
var numeroEpisode = 0;
var maxEpisode = 10;
var fichierLien = fs.read('C:\\PhantomJS\\Fichier\\lien.txt');
var ListeLien = fichierLien.split(/[\n]/);
var page = require('webpage').create();
function GetPage()
{
if (numeroEpisode > maxEpisode)
{
phantom.exit();
}
page.open(ListeLien[numeroEpisode], function(status)
{
if(status !== 'success')
{
console.log('Impossible de charger la page.');
}
else
{
console.log('URL: '+ListeLien[numeroEpisode]+'');
page.injectJs('http://mylink.com', function() { });
var path = 'C:\\PhantomJS\\Fichier\\episode_'+numeroEpisode+'.html';
fs.write(path, page.content, 'w');
setTimeout(GetPage, 15000); // run again in 15 seconds
numeroEpisode++;
}
});
}
GetPage();
Don't mix up page.injectJs() and page.includeJs().
injectJs(filename): Loads a local JavaScript file into the page and evaluates it synchronously.
includeJs(url, callback): Loads a remote JavaScript file from the specified URL and evaluates it. Since it has to request a remote resource, this is done asynchronously. The passed callback is called as soon as the operation finished. If you don't use the callback, your code will most likely run before the remote JavaScript was included. Use that callback:
page.includeJs('http://mylink.com', function() {
var path = 'C:\\PhantomJS\\Fichier\\episode_'+numeroEpisode+'.html';
fs.write(path, page.content, 'w');
numeroEpisode++;
setTimeout(GetPage, 15000); // run again in 15 seconds
});
Since the JavaScript that you load changes something on the page, you probably need to load it after all the pages script have run. If this is a JavaScript heavy page, then you need to wait a little. You can wait a static amount of time:
setTimeout(function(){
page.includeJs('http://mylink.com', function() {
//...
});
}, 5000); // 5 seconds
or utilize waitFor to wait until an element appears that denotes that the page is completely loaded. This can be very tricky sometimes.
If you still want to use injectJs() instead of includeJs() (for example because of its synchronous nature), then you need to download the external JavaScript file to your machine and then you can use injectJs().

How to open a new tab in CasperJS

I am using CasperJS testing framework to make some test suite since almost a month now, but I am facing a problem in one of them.
Here is what I want to do: I am browsing a url (page1) and I have to make another action from an another url (simulate a new tab like we have on our graphics browser) without quitting the first one (page1). The action from the second url is going to change my first one. Hope it's clear enough :)
So for now when I reach the step to observe that on my first url I open the second one by doing a thenOpen(), so it's making a new navigation step and I am losing the current session and I cannot come back on it. I try many way such as using the history, reopen the page, using the event from CasperJS and also I try with PhantomJS but without success.
Here is some pseudo code to make it clearer:
casper.test.begin("A random test suite", 0, function testSuite(test) {
casper.start(url1, function () {
casper.then(function() {
// do some action on the first url
});
casper.then(function () {
// open url2 and do some action in a new tab to not lose the session of url1
});
casper.then(function () {
// check url1 (who should be still open)
});
});
casper.run(function () {
test.done();
});
});
I really would like to use CasperJS to do that but I start to think it is not possible, and I am starting to look in different solution such as this post:
CasperJS, parallel browsing WITH the testing framework. But I have never use node.js before so if it's the only way please show me some example.
Generally, it's not possible because a casper script runs inside only one phantomjs runtime. In your case it seems possible.
Note: Because this relies on a second casper instance, this cannot be used in a casper test environment.
You can create a new casper instance (casper2) inside one step of the outer casper instance (casper1). You then have to instruct casper1 to wait for completion of the casper2 instance, since casper is asynchronous in nature. Keep in mind that this is exactly like a new tab, so the instances will share the cache, cookies and storage.
Here is an sample script:
var casper1 = require('casper').create();
var casper2done = false;
casper1.start("http://www.example.com").then(function(){
casper1.capture("casper1_1.png");
var casper2 = require('casper').create();
casper2.start("http://stackoverflow.com/contact").then(function(){
casper1.echo(casper2.getCurrentUrl(), casper2.getTitle());
casper2.capture("casper2.png");
}).run(function(){
this.echo("DONE 2");
casper2done = true;
});
}).waitFor(function check(){
return casper2done;
}).then(function(){
casper1.echo(casper1.getCurrentUrl(), casper1.getTitle()); // Comment to fix answer (min 6 chars)
casper1.capture("casper1_2.png");
}).run(function(){
this.echo("DONE");
this.exit();
});
Here I use the promise chaining/builder pattern. You can even make your own function to hide the complexity and make it repeatedly usable:
var casper = require('casper').create();
// IIFE to hide casper2done variable
(function(casper){
var casper2done = false;
casper.newTab = function(url, then, timeout){
if (typeof url !== "string" || typeof then !== "function") {
throw "URL or then callback are missing";
}
this.then(function(){
var casper2 = require('casper').create();
casper2.start(url).then(then).run(function(){
casper2done = true;
});
}).waitFor(function check(){
return casper2done;
}, null, null, timeout).then(function(){
casper2done = false;
});
return this;
};
})(casper);
casper.start("http://www.example.com").newTab("http://stackoverflow.com/contact", function(){
// this is casper2
this.echo(this.getCurrentUrl(), this.getTitle());
this.capture("casper2_1.png");
this.thenClick("a#nav-askquestion");
this.then(function(){
this.echo(this.getCurrentUrl(), this.getTitle());
this.capture("casper2_2.png");
});
}, 15000).then(function(){
// this is casper
this.echo(casper.getCurrentUrl(), casper.getTitle());
this.capture("casper1.png");
}).run(function(){
this.echo("DONE");
this.exit();
});
You can use multiple steps in your child casper instance, but don't forget to specify a good timeout.

Categories

Resources