I used this example to create a phantomjs code to login to website.
var page = require('webpage').create();
page.open("http://www.facebook.com/login.php", function(status) {
if (status === "success") {
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.evaluate(function() {
console.log('hello');
document.getElementById("email").value = "email";
document.getElementById("pass").value = "password";
document.getElementById("u_0_1").click();
// page is redirecting.
});
setTimeout(function() {
page.evaluate(function() {
console.log('haha');
});
page.render("page.png");
phantom.exit();
}, 5000);
}
});
From this link.
https://gist.github.com/ecin/2473860
But I want to open another link from a button or go directly on it. How can I do it?
Here is a simpler example. Doesn't work...
var page = require('webpage').create();
var url = "www.example.com";
page.open(url, function (status) {
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("example.png");
phantom.exit();
}, 5000);
});
var url = "www.google.com";
page.open(url, function (status) {
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("google.png");
phantom.exit();
}, 5000);
});
Very close, now combine your two snippets into one. page.open() is asynchronous which is why you need to open the next page only after the first one has finished:
var page = require('webpage').create();
var url = "http://www.example.com";
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.open(url, function (status) {
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.evaluate(function() {
document.getElementById("email").value = "email";
document.getElementById("pass").value = "password";
document.getElementById("u_0_1").click();
// page is redirecting.
});
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("example.png");
var url = "http://www.google.com";
page.open(url, function (status) {
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("google.png");
phantom.exit();
}, 5000);
});
}, 5000);
});
To actually see the console.log() inside of page.evaluate() you will need to register to the page.onConsoleMessage event. There are more other events that are helpful when debugging.
Don't forget to add the protocol (http:// or file:///) to the URLs that you're opening. PhantomJS is a bit picky in that regard.
Instead of waiting a static amount of time (setTimeout()) until the next page is loaded after you do some action. You should make use of the page.onLoadFinished event. This is rather cumbersome to get right for navigation intensive scripts. Use CasperJS for longer scripts.
Oftentimes Element.click() doesn't work. This question has many solutions for those cases.
Related
I'm pretty new to PhantomJS, so I'm just trying to stumble through some practical examples (the documentation feels a little light on those).
One thing I was attempting was to submit votes to a phony PollDaddy poll, but I'm struggling. Using the script below, I can tell from the first screenshot that my option button is being clicked/selected. But why does my answer not get submitted? The second screenshot looks identical to the first, despite my code executing a click event on the VOTE button in the second evaluate. Anyone know why? Know how to make it work?
var webPage = require('webpage');
var page = webPage.create();
//******* BEGIN LOGGING METHODS *******
// http://phantomjs.org/api/webpage/handler/on-url-changed.html
page.onUrlChanged = function(targetUrl) {
console.log('New URL: ' + targetUrl);
};
// http://phantomjs.org/api/webpage/handler/on-console-message.html
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
// http://phantomjs.org/api/webpage/handler/on-error.html
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
// http://phantomjs.org/api/webpage/handler/on-resource-error.html
page.onResourceError = function(resourceError) {
console.log('Unable to load resource (#' + resourceError.id + ' URL:' + resourceError.url + ')');
console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
};
// http://phantomjs.org/api/webpage/handler/on-resource-timeout.html
page.onResourceTimeout = function(request) {
console.log('Response Timeout (#' + request.id + '): ' + JSON.stringify(request));
};
//******* END LOGGING METHODS *******
page.open('https://polldaddy.com/poll/9424638/', function(status) {
console.log('Status: ' + status);
//make selection
var myselection = page.evaluate(function() {
var ev = document.createEvent("MouseEvent");
ev.initMouseEvent("click", true, true, window, null,0, 0, 0, 0, false, false, false, false, 0, null);
//radio button for Curly has id=PDI_answer42988707
var myselection = document.querySelector("#PDI_answer42988707");
(myselection).dispatchEvent(ev);
return document.title;
});
//screen capture the selection
page.render('selection.png');
console.log(myselection);
//click the Vote button
var dovote = page.evaluate(function() {
var ev = document.createEvent("MouseEvent");
ev.initMouseEvent("click", true, true, window, null,0, 0, 0, 0, false, false, false, false, 0, null);
//get a handle to the vote button
var votebutton = document.querySelector("a.vote-button");
//click the vote button
(votebutton).dispatchEvent(ev);
return document.title;
});
//delay, then take screenshot...
setTimeout(
function(){
page.render('voted.png');
console.log(dovote);
}
,2000
);
});
I'm executing the script using PhantomJS 1.9.0 on Linux Mint. The execution parameters are like this to overcome any SSL Handshake failures:
phantomjs --ignore-ssl-errors=true --ssl-protocol=any myscript.js
I've also tried setting --cookies-file and --local-storage-path on the command line, but that didn't help either.
The output I get from all of the console logging above is this:
New URL: https://polldaddy.com/poll/9424638/ <-- from urlChange
CONSOLE: JQMIGRATE: Logging is active (from line # in "") <-- from onConsoleMessage
Status: success
Who is your favorite Stooge (poll 9424638) | Polldaddy.com
Who is your favorite Stooge (poll 9424638) | Polldaddy.com
My useless poll is here: https://polldaddy.com/poll/9424638/, and you'll see from my script that I'm trying to stack the results for Curly.
If you look at the javascript that is being included with the polldaddy page, you'll find a vote function with the following check:
if (event.pageX) {
eventX = event.pageX;
eventY = event.pageY;
} else {
eventX = event.clientX;
eventY = event.clientY;
}
...
if (eventX == 0 && eventY == 0) {
return false
}
Basically they are looking at the event location and short-circuiting if the x and y location are both equal to 0.
If you change the mouse event that you create so that the clientX or clientY value is not 0, then your script should work.
The javascript is located on the same website that I'm opening.
Example: http://test.site.com
js => http://test.site.com/cache/6343019445fb7d95bd2bd09c5bbfb002.js
I'm also including jQuery from googleapis and that appears to load! I tried getting some data from events like onResourceError but nothing shows up.
This is my test code:
var page = require('webpage').create();
page.viewportSize = {
width: 1366,
height: 720
};
page.settings.loadImages = true;
page.settings.localToRemoteUrlAccessEnabled = true;
page.settings.javascriptEnabled = true;
page.settings.webSecurityEnabled = false;
page.settings.XSSAuditingEnabled = false;
var uri = 'http://test.site.com';
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.onResourceError = function(resourceError) {
console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')');
console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
};
page.onResourceTimeout = function(request) {
console.log('Response (#' + request.id + '): ' + JSON.stringify(request));
};
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
page.onLoadFinished = function(){
setTimeout(function(){
console.log(page.content); // <script> is there
console.log('render');
page.render('test.png');
phantom.exit();
}, 4000);
};
page.open(uri, function() {
console.log('loaded');
});
I see no other console messages besides render, loaded and the page content.
What am I doing wrong?
You'll have to evaluate the page. I believe it's something like this.
page.evaluate(function () {
// This is after page is evaluated and javascript is run
});
Note that evaluate doesn't give you access to the JS functions or variables, it only runs it for you. If you need to do access the JS code from Phantom, you'll want to put the JavaScript code inside the page.evaluate yourself so that you can console.log or manipulate as you need to.
I'm having a hard time converting this code for it to be usable in a node server. So this code is written to run in a PhantomJS process (i.e. $: phantomjs index.js) but I want to run it in a node server using the package require("phantom"); I'm having a trouble getting these two callbacks to work however.
page.onLoadFinished = function(status){
console.log("Load Finished");
};
page.onUrlChanged = function(){
console.log("URL Changed");
};
Here is my pathetic attempt at trying to nodefy the whole situation.
phantom.create(['--ignore-ssl-errors=yes','--load-images=no']).then(function(ph) {
console.log("here");
ph.createPage().then(function(page) {
page.property('onResourceRequested', function(requestData, networkRequest) {
console.log(requestData.url);
});
page.open('https://example.com/login').then(function(status) {
console.log(status);
if (status !== 'success') { console.log("failed connection")} else {
page.evaluate(function() {
document.getElementById('email').value = "stuff";
document.getElementById('password').value = "things";
setTimeout(document.getElementsByTagName('button')[0].click(),5000);
console.log("login attempt");
setTimeout(document.URL, 2000);
});
page.onLoadFinished = function(status){
console.log("Load Finished");
};
page.onUrlChanged = function(){
console.log("url changed");
};
}
});
});
});
Also the code works and gets the page and clicks the button, however the problem is after the phantom logs in, I need data from the next page which I was going to use the onUrlChanged and onLoadFinished to do.
page.onLoadFinished and page.onUrlChanged are callback functions that are executed after page has been opened, so it makes sense to assign them before opening an url.
It is also a useful habit to subscribe to console.log and error messages from a webpage.
var phantom = require('phantom');
phantom.create(['--ignore-ssl-errors=yes','--load-images=no']).then(function(ph) {
console.log("here");
ph.createPage().then(function(page) {
page.property('onError', function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
});
page.property('onConsoleMessage', function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
});
page.property('onResourceRequested', function(requestData, networkRequest) {
console.log(requestData.url);
});
page.property('onLoadFinished', function(status) {
console.log("Load Finished with status " + status);
});
page.property('onUrlChanged', function(targetUrl) {
console.log("URL changed to: " + targetUrl);
});
page.open('https://example.com/login').then(function(status) {
if (status !== 'success') { console.log("failed connection")} else {
page.evaluate(function() {
document.getElementById('email').value = "email";
document.getElementById('password').value = "password";
setTimeout(function(){
console.log("login attempt");
document.getElementsByTagName('button')[0].click();
}, 5000);
});
});
}
});
});
});
I happened to write a program to use the google translate (http://www.translate.google.com)
using PhantomJS.
But I'am unable to insert text into the textarea . .I’ve searched a lot but nothing proved useful . However i am able to print the result content .
Here's my code:
var page = require('webpage').create();
page.open("http://translate.google.com", function(status) {
if ( status === "success" ) {
console.log(status);
page.includeJs("//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js", function() {
var c=page.evaluate(function(){$('source').val("sample text");
return $('#source').text();
});
var f= page.evaluate(function() {
$('#source').val("sbjbsdfsdfbbs");
return $('#source').text();
});
console.log(f);//should print input text
var result= page.evaluate(function() {
$('#source').val("sbjbsdfsdfbbs");
return $('#result_box').text();
});
console.log(result);
phantom.exit()
});
}
});
Try to attach your text and languages to the URL when opening the page:
'http://translate.google.com/#en/es/translate this'
var page = require('webpage').create();
var inputText = 'translate this';
var langFrom = 'en';
var langTo = 'es';
var pageURL = 'http://translate.google.com/#' + langFrom + '/' + langTo + '/' + inputText;
page.open(pageURL, function(status) {
// your code to get results here
});
Here you go:
translate.js:
#!/usr/bin/env phantomjs
var system = require('system');
var text = system.stdin.read();
var sourceLang="en";
var targetLang="pt_BR";
var url = "https://translate.google.com/#"+sourceLang+"/"+targetLang;
var page = require('webpage').create();
page.settings.userAgent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:13.0) Gecko/20100101 Firefox/13.0';
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
// uncomment to log into the console
// console.error(msgStack.join('\n'));
};
page.onConsoleMessage = function (msg) {
if ( msg == "phanthom.exit()" ) {
phantom.exit();
} else {
system.stdout.write(msg);
system.stdout.flush();
}
page.render("test.png");
};
/*
* This function wraps WebPage.evaluate, and offers the possibility to pass
* parameters into the webpage function. The PhantomJS issue is here:
*
* http://code.google.com/p/phantomjs/issues/detail?id=132
*
* This is from comment #43.
*/
function evaluate(page, func) {
var args = [].slice.call(arguments, 2);
var fn = "function() { return (" + func.toString() + ").apply(this, " + JSON.stringify(args) + ");}";
return page.evaluate(fn);
}
page.open(url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var result = evaluate(page, function(text){
var getResult=function(){
var result_box=document.querySelector("#result_box");
var input_box=document.querySelector("#source");
if ( input_box == null )
setTimeout( getResult, 1000 );
else {
input_box.value=text;
if ( result_box == null || result_box.innerText == "" ) {
setTimeout( getResult, 1000 );
} else {
console.log(result_box.innerText);
console.log("phanthom.exit()")
}
}
}
getResult();
}, text );
}
});
...
$ echo "phantomjs, is a fantastic tool" | phantomjs translate.js | iconv --from cp1252
PhantomJS, é uma ferramenta fantástica
screenshot.js
var page = require("webpage").create();
var homePage = "http://www.google.com/";
page.open(homePage);
page.onLoadFinished = function(status) {
var url = page.url;
console.log("Status: " + status);
console.log("Loaded: " + url);
page.render("google.png");
phantom.exit();
};
Terminal:
bin/phantomjs screenshot.js
Question:
Is there any way that I can send phantomjs the URL (value of var homePage above) somehow outside of screenshot.js so that its not hard coded inside the script?
Add the url to the command line
bin/phantomjs screenshot.js http://www.google.com/
Here you have an example from the docs :https://github.com/ariya/phantomjs/blob/master/examples/arguments.js
var system = require('system');
if (system.args.length === 1) {
console.log('Try to pass some args when invoking this script!');
} else {
system.args.forEach(function (arg, i) {
console.log(i + ': ' + arg);
});
}
phantom.exit();