Phantom node package equivalent code for PhantomJS - javascript

I'm having a hard time converting this code for it to be usable in a node server. So this code is written to run in a PhantomJS process (i.e. $: phantomjs index.js) but I want to run it in a node server using the package require("phantom"); I'm having a trouble getting these two callbacks to work however.
page.onLoadFinished = function(status){
console.log("Load Finished");
};
page.onUrlChanged = function(){
console.log("URL Changed");
};
Here is my pathetic attempt at trying to nodefy the whole situation.
phantom.create(['--ignore-ssl-errors=yes','--load-images=no']).then(function(ph) {
console.log("here");
ph.createPage().then(function(page) {
page.property('onResourceRequested', function(requestData, networkRequest) {
console.log(requestData.url);
});
page.open('https://example.com/login').then(function(status) {
console.log(status);
if (status !== 'success') { console.log("failed connection")} else {
page.evaluate(function() {
document.getElementById('email').value = "stuff";
document.getElementById('password').value = "things";
setTimeout(document.getElementsByTagName('button')[0].click(),5000);
console.log("login attempt");
setTimeout(document.URL, 2000);
});
page.onLoadFinished = function(status){
console.log("Load Finished");
};
page.onUrlChanged = function(){
console.log("url changed");
};
}
});
});
});
Also the code works and gets the page and clicks the button, however the problem is after the phantom logs in, I need data from the next page which I was going to use the onUrlChanged and onLoadFinished to do.

page.onLoadFinished and page.onUrlChanged are callback functions that are executed after page has been opened, so it makes sense to assign them before opening an url.
It is also a useful habit to subscribe to console.log and error messages from a webpage.
var phantom = require('phantom');
phantom.create(['--ignore-ssl-errors=yes','--load-images=no']).then(function(ph) {
console.log("here");
ph.createPage().then(function(page) {
page.property('onError', function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
});
page.property('onConsoleMessage', function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
});
page.property('onResourceRequested', function(requestData, networkRequest) {
console.log(requestData.url);
});
page.property('onLoadFinished', function(status) {
console.log("Load Finished with status " + status);
});
page.property('onUrlChanged', function(targetUrl) {
console.log("URL changed to: " + targetUrl);
});
page.open('https://example.com/login').then(function(status) {
if (status !== 'success') { console.log("failed connection")} else {
page.evaluate(function() {
document.getElementById('email').value = "email";
document.getElementById('password').value = "password";
setTimeout(function(){
console.log("login attempt");
document.getElementsByTagName('button')[0].click();
}, 5000);
});
});
}
});
});
});

Related

PhantomJS not loading .js from same URL

The javascript is located on the same website that I'm opening.
Example: http://test.site.com
js => http://test.site.com/cache/6343019445fb7d95bd2bd09c5bbfb002.js
I'm also including jQuery from googleapis and that appears to load! I tried getting some data from events like onResourceError but nothing shows up.
This is my test code:
var page = require('webpage').create();
page.viewportSize = {
width: 1366,
height: 720
};
page.settings.loadImages = true;
page.settings.localToRemoteUrlAccessEnabled = true;
page.settings.javascriptEnabled = true;
page.settings.webSecurityEnabled = false;
page.settings.XSSAuditingEnabled = false;
var uri = 'http://test.site.com';
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.onResourceError = function(resourceError) {
console.log('Unable to load resource (#' + resourceError.id + 'URL:' + resourceError.url + ')');
console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
};
page.onResourceTimeout = function(request) {
console.log('Response (#' + request.id + '): ' + JSON.stringify(request));
};
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));
});
}
console.error(msgStack.join('\n'));
};
page.onLoadFinished = function(){
setTimeout(function(){
console.log(page.content); // <script> is there
console.log('render');
page.render('test.png');
phantom.exit();
}, 4000);
};
page.open(uri, function() {
console.log('loaded');
});
I see no other console messages besides render, loaded and the page content.
What am I doing wrong?
You'll have to evaluate the page. I believe it's something like this.
page.evaluate(function () {
// This is after page is evaluated and javascript is run
});
Note that evaluate doesn't give you access to the JS functions or variables, it only runs it for you. If you need to do access the JS code from Phantom, you'll want to put the JavaScript code inside the page.evaluate yourself so that you can console.log or manipulate as you need to.

setup prerender.io to output only error messages from PhantomJS

I've managed to switch on logging for prerender.io for the PhantomJS browser, using the logger plugin. However, I would like only error messages to be logged. The logger plugin logs just everything. Based on the below pages:
http://phantomjs.org/api/webpage/handler/on-error.html
http://phantomjs.org/api/phantom/handler/on-error.html
PhantomJS should have an onError event which is triggered when an error occurred. Before trying to create a plugin, I've tried temporarily to update server.js of prerender.io, to attach to the onError event. I've updated both server.createPage function as below, to attach to page.onError and even tried page.set('onError'), similar to the code found in the logger.js
server.createPage = function(req, res) {
var _this = this;
if(!this.phantom) {
setTimeout(function(){
_this.createPage(req, res);
}, 50);
} else {
req.prerender.phantomId = this.phantom.id;
this.phantom.createPage(function(page){
req.prerender.page = page;
console.log("registering onError for page");
page.onError = function(msg, trace) {
var msgStack = ['PAGE PHANTOM ERROR OCCURRED: ' + msg];
msgStack.push('----------------------------------');
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
msgStack.push('==================================');
console.log(msgStack.join('\n'));
};
page.set('onError', function(msg, trace) {
var msgStack = ['PAGE2 PHANTOM ERROR OCCURRED: ' + msg];
msgStack.push('----------------------------------');
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
msgStack.push('==================================');
console.log(msgStack.join('\n'));
});
_this.onPhantomPageCreate(req, res);
});
}
};
I've also updated the server.onPhantomCreate(), method to attach to the general phantom.onError, as per below:
server.onPhantomCreate = function(phantom) {
util.log('started phantom');
this.phantom = phantom;
this.phantom.id = Math.random().toString(36);
console.log("Registering phantom.onError");
phantom.onError = function(msg, trace) {
var msgStack = ['PHANTOM ERROR OCCURRED: ' + msg];
msgStack.push('----------------------------------');
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
msgStack.push('==================================');
console.log(msgStack.join('\n'));
};
phantom.set('onError', function(msg, trace) {
var msgStack = ['PHANTOM ERROR OCCURRED: ' + msg];
msgStack.push('----------------------------------');
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
msgStack.push('==================================');
console.log(msgStack.join('\n'));
});
};
However, nothing is being logged. Any ideas why is this?
You can pass onStdout and onStderr functions through with the prerender options object:
var server = prerender({
workers: 1,
iterations: 50,
onStdout: function(data) {
console.log(data);
},
onStderr: function(data) {
console.log(data);
}
});
You can't put onError on phantom directly because it isn't a real phantom instance, it's a node pipeline to the real phantomjs instance.

PhantomJS open one page after another

I used this example to create a phantomjs code to login to website.
var page = require('webpage').create();
page.open("http://www.facebook.com/login.php", function(status) {
if (status === "success") {
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.evaluate(function() {
console.log('hello');
document.getElementById("email").value = "email";
document.getElementById("pass").value = "password";
document.getElementById("u_0_1").click();
// page is redirecting.
});
setTimeout(function() {
page.evaluate(function() {
console.log('haha');
});
page.render("page.png");
phantom.exit();
}, 5000);
}
});
From this link.
https://gist.github.com/ecin/2473860
But I want to open another link from a button or go directly on it. How can I do it?
Here is a simpler example. Doesn't work...
var page = require('webpage').create();
var url = "www.example.com";
page.open(url, function (status) {
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("example.png");
phantom.exit();
}, 5000);
});
var url = "www.google.com";
page.open(url, function (status) {
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("google.png");
phantom.exit();
}, 5000);
});
Very close, now combine your two snippets into one. page.open() is asynchronous which is why you need to open the next page only after the first one has finished:
var page = require('webpage').create();
var url = "http://www.example.com";
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.open(url, function (status) {
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")');
};
page.evaluate(function() {
document.getElementById("email").value = "email";
document.getElementById("pass").value = "password";
document.getElementById("u_0_1").click();
// page is redirecting.
});
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("example.png");
var url = "http://www.google.com";
page.open(url, function (status) {
setTimeout(function () {
page.evaluate(function () {
console.log('haha');
});
page.render("google.png");
phantom.exit();
}, 5000);
});
}, 5000);
});
To actually see the console.log() inside of page.evaluate() you will need to register to the page.onConsoleMessage event. There are more other events that are helpful when debugging.
Don't forget to add the protocol (http:// or file:///) to the URLs that you're opening. PhantomJS is a bit picky in that regard.
Instead of waiting a static amount of time (setTimeout()) until the next page is loaded after you do some action. You should make use of the page.onLoadFinished event. This is rather cumbersome to get right for navigation intensive scripts. Use CasperJS for longer scripts.
Oftentimes Element.click() doesn't work. This question has many solutions for those cases.

using Google Translate on phantomjs

I happened to write a program to use the google translate (http://www.translate.google.com)
using PhantomJS.
But I'am unable to insert text into the textarea . .I’ve searched a lot but nothing proved useful . However i am able to print the result content .
Here's my code:
var page = require('webpage').create();
page.open("http://translate.google.com", function(status) {
if ( status === "success" ) {
console.log(status);
page.includeJs("//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.js", function() {
var c=page.evaluate(function(){$('source').val("sample text");
return $('#source').text();
});
var f= page.evaluate(function() {
$('#source').val("sbjbsdfsdfbbs");
return $('#source').text();
});
console.log(f);//should print input text
var result= page.evaluate(function() {
$('#source').val("sbjbsdfsdfbbs");
return $('#result_box').text();
});
console.log(result);
phantom.exit()
});
}
});
Try to attach your text and languages to the URL when opening the page:
'http://translate.google.com/#en/es/translate this'
var page = require('webpage').create();
var inputText = 'translate this';
var langFrom = 'en';
var langTo = 'es';
var pageURL = 'http://translate.google.com/#' + langFrom + '/' + langTo + '/' + inputText;
page.open(pageURL, function(status) {
// your code to get results here
});
Here you go:
translate.js:
#!/usr/bin/env phantomjs
var system = require('system');
var text = system.stdin.read();
var sourceLang="en";
var targetLang="pt_BR";
var url = "https://translate.google.com/#"+sourceLang+"/"+targetLang;
var page = require('webpage').create();
page.settings.userAgent = 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:13.0) Gecko/20100101 Firefox/13.0';
page.onError = function(msg, trace) {
var msgStack = ['ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
});
}
// uncomment to log into the console
// console.error(msgStack.join('\n'));
};
page.onConsoleMessage = function (msg) {
if ( msg == "phanthom.exit()" ) {
phantom.exit();
} else {
system.stdout.write(msg);
system.stdout.flush();
}
page.render("test.png");
};
/*
* This function wraps WebPage.evaluate, and offers the possibility to pass
* parameters into the webpage function. The PhantomJS issue is here:
*
* http://code.google.com/p/phantomjs/issues/detail?id=132
*
* This is from comment #43.
*/
function evaluate(page, func) {
var args = [].slice.call(arguments, 2);
var fn = "function() { return (" + func.toString() + ").apply(this, " + JSON.stringify(args) + ");}";
return page.evaluate(fn);
}
page.open(url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var result = evaluate(page, function(text){
var getResult=function(){
var result_box=document.querySelector("#result_box");
var input_box=document.querySelector("#source");
if ( input_box == null )
setTimeout( getResult, 1000 );
else {
input_box.value=text;
if ( result_box == null || result_box.innerText == "" ) {
setTimeout( getResult, 1000 );
} else {
console.log(result_box.innerText);
console.log("phanthom.exit()")
}
}
}
getResult();
}, text );
}
});
...
$ echo "phantomjs, is a fantastic tool" | phantomjs translate.js | iconv --from cp1252
PhantomJS, é uma ferramenta fantástica

Send URL to Phantomjs?

screenshot.js
var page = require("webpage").create();
var homePage = "http://www.google.com/";
page.open(homePage);
page.onLoadFinished = function(status) {
var url = page.url;
console.log("Status: " + status);
console.log("Loaded: " + url);
page.render("google.png");
phantom.exit();
};
Terminal:
bin/phantomjs screenshot.js
Question:
Is there any way that I can send phantomjs the URL (value of var homePage above) somehow outside of screenshot.js so that its not hard coded inside the script?
Add the url to the command line
bin/phantomjs screenshot.js http://www.google.com/
Here you have an example from the docs :https://github.com/ariya/phantomjs/blob/master/examples/arguments.js
var system = require('system');
if (system.args.length === 1) {
console.log('Try to pass some args when invoking this script!');
} else {
system.args.forEach(function (arg, i) {
console.log(i + ': ' + arg);
});
}
phantom.exit();

Categories

Resources