I'm using phantomjs to capture screen of my webpage. I have SVG elements on my page and sometimes it doesn't render those correctly.
var page = require('webpage').create();
var args = require('system').args;
var isLoad = false;
page.settings.userAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90";
var w = parseFloat(args[4]);
var h = parseFloat(args[5]);
page.viewportSize = { width: w, height: h };
page.open('http://www.facegift.co.il/canvas/?userItemId=' + args[1] + '&Sc=' + args[2] + '&print=' + args[6] + '&width=' + args[4] + '&pageNum=' + args[3] + '&ver=' + args[7], function(status) {
var ue = page.evaluate(function(){
return navigator.userAgent;
});
console.log("status: " + status);
if(status === "success") {
setInterval(function(){
var ps = page.evaluate(function() {
document.body.bgColor = 'white';
return document.getElementById("pageStatus").innerHTML;
});
if(!isLoad){
console.log("wait...");
if(ps == "loaded"){
console.log("loaded");
isLoad = true;
page.render('example1.jpg');
console.log("rendered");
phantom.exit();
}
}
}, 100);
}
});
here are examples of two results with the same request:
here is the link to the actual page I want to render:
http://www.facegift.co.il/canvas/?userItemId=17887&sc=987404&print=1&width=4000&pageNum=7
to call phantom I use:
phantomjs facegift.js 17887 987404 7 4000 2048.77 1 2335
ok, fixed. the problem was in my code.
thanks to #Paul LeBeau who answered my next related question.
SVG disappear when zoom in on chrome
phantomjs and chrome are great. sorry I thought something went wrong on their behalf.
Related
I am going to run phantomjs in nodejs child process (spawn) to render webpage to picture. but when i set page.setting.javascriptEnabled = true and render page over three times(first time and second time are right), it will throw error. and if set javascriptEnabled = false, it runs well.
this is log in iterm
code:
var webpage = require('webpage');
var write = require('system').stdout.write;
var configMod = require('./config');
var config = configMod.get();
var args = require('system').args;
var phantomId = args[1];
function doJob(job) {
var page = webpage.create();
var jobId = job.id;
var url = job.url;
var viewportSize = job.viewportSize || config.viewportSize;
var clipRect = job.clipRect || config.clipRect;
var zoomFactor = job.zoomFactor || config.zoomFactor;
var imagePath = job.imagePath;
viewportSize && (page.viewportSize = viewportSize);
clipRect && (page.clipRect = clipRect);
zoomFactor && (page.zoomFactor = zoomFactor);
page.settings = {
javascriptEnabled: true,
loadImages: true,
resourceTimeout: 3000,
userAgent: 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) PhantomJS/2.1.0'
};
page.open(url, function (status) {
write(url + ':' + status)
var data;
var fetchObj;
if (status === 'fail') {
data = {
JOBID: jobId,
url: url,
phantomId: phantomId,
status: false
};
// release the memory
page.close();
// send data to NodeJS
write('{{begin}}' + JSON.stringify(data) + '{{end}}');
} else if (status === 'success') {
page.render(imagePath, {quality: job.quality});
data = {
JOBID: jobId,
url: url,
phantomId: phantomId,
image: imagePath,
status: true
};
// release the memory
page.close();
// send data to NodeJS
write('{{begin}}' + JSON.stringify(data) + '{{end}}');
}
});
}
I'm struggling to get a casperjs to move on to the next page after it has recursively worked through the links on the page.
I can get it to take data from each page and move through the pages, or click on each link on a page, but I can't get it doing both.
var utils = require('utils');
var x = require('casper').selectXPath;
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
waitTimeout: 10000,
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
var currentPage = 1;
var i = 0;
var links = [];
var link_titles = [];
var terminate = function() {
this.echo("Exiting..").exit();
};
function getSelectedPage() {
var el = document.querySelector('td.cur');
return parseInt(el.textContent);
}
function getPageLinks () {
var links = document.querySelectorAll('h3.r a');
return [].map.call(links, function(link) {
return link.getAttribute('href');
});
}
function getLinkData(link) {
this.thenOpen(link, function() {
var title = this.getTitle();
// Add the data from link
var data = {
title: title,
};
link_titles.push(data);
});
}
function loopThroughLinks() {
if( i < links.length) {
this.echo('[LINK #' + i + '] '+ links[i]);
getLinkData.call(this, links[i]);
i++;
this.run(loopThroughLinks);
} else {
utils.dump(link_titles);
}
}
function linkData(){
links = this.evaluate(getPageLinks);
this.run(loopThroughLinks);
}
var processPage = function() {
this.run(linkData);
//PROBLEM EXISTS BELOW HERE - IF YOU COMMENT OUT FROM HERE IT RUNS AS EXPECTED FOR THE FIRST PAGE
//WITH CODE BELOW INCLUDED, SKIPS this.run(linkData) AND JUST GOES THROUGH PAGES;
this.then(function(){
if (currentPage >= 3) {
return terminate.call(casper);
}
currentPage++;
this.echo("requesting next page: " + currentPage);
this.capture("google-results-p" + currentPage + ".png");
this.thenClick('a.pn span').then(function(){
this.waitFor(function(){
return currentPage === this.evaluate(getSelectedPage);
}, processPage, terminate);
});
}); //COMMENT OUT TO HERE FOR WORKING ONE PAGE VERSION
}
casper.start('https://www.google.co.uk/?gws_rd=ssl#q=casperjs');
casper.run(processPage);
Updated code to reflect multiple run calls. Now looping through first page corrrectly, but printing results from first page for all other pages??
var utils = require('utils');
var x = require('casper').selectXPath;
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
waitTimeout: 10000,
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
var currentPage = 1;
var i = 0;
var links = [];
var link_titles = [];
var terminate = function() {
this.echo("Exiting..").exit();
};
function getSelectedPage() {
var el = document.querySelector('td.cur');
return parseInt(el.textContent);
}
function getPageLinks() {
var links = document.querySelectorAll("h3.r a");
return Array.prototype.map.call(links, function(e) {
try {
// google handles redirects hrefs to some script of theirs
return (/url\?q=(.*)&sa=U/).exec(e.getAttribute("href"))[1];
} catch (err) {
return e.getAttribute("href");
}
});
}
function getLinkData(link) {
this.thenOpen(link, function() {
//var title = this.fetchText('title');
var title = this.getTitle();
// Add the staff data from link
var data = {
title: title,
};
link_titles.push(data);
this.then(function(){ ///ADDED - BACK TO RIGHT PAGE FOR SELECTOR
this.back();
});
});
}
function loopThroughLinks() {
if( i < links.length) {
this.echo('[LINK #' + i + '] '+ links[i]);
getLinkData.call(this, links[i]);
i++;
this.then(loopThroughLinks);
} else {
utils.dump(link_titles);
}
}
function linkData(){
links = this.evaluate(getPageLinks);
this.then(loopThroughLinks);
}
var processPage = function() {
this.wait(2000, function(){
this.then(linkData);
});
this.wait(2000, function(){
this.then(function(){
if (currentPage >= 3) {
return terminate.call(casper);
}
this.echo("requesting next page: " + currentPage);
this.capture("google-results-p" + currentPage + ".png");
currentPage++;
this.thenClick('a.pn span').then(function(){
this.capture('google-results-2-p' + currentPage + '.png');
this.waitFor(function(){
return currentPage === this.evaluate(getSelectedPage);
}, processPage, terminate);
});
});
});
}
casper.start('https://www.google.co.uk/?gws_rd=ssl#q=casperjs');
casper.then(processPage);
casper.run();
You have to have only one casper.run() (and only one casper.start()) call. run() starts the CasperJS step queue and will finish execution if there are no further steps. The only call that needs to stay is casper.run(processPage);, but all other this.run(...) calls need to be changed to this.then(...).
I don't have any idea why scrollToBottom() is not working. I want to keep scrolling to bottom of the page if the current data (from evaluate) is greater than previous data.
PhantomJS 2.0.0
CasperJS 1.1.0-beta3
var casper = require('casper').create({
verbose: true,
logLevel: 'info',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
}
});
casper.options.waitTimeout = 60000;
var utils = require('utils');
var hospitals = [];
var prevTotalHospitals = 0;
var currentTotalHospitals = 0;
casper.start('https://www.docdoc.com/').thenClick("#form-submit-btn").then(function() {
this.wait(5000);
this.scrollToBottom();
processPage();
});
casper.on("remote.message", function(msg){
this.echo("remote> " + msg);
});
casper.on('step.error', function(err) {
this.die("Step has failed: " + err);
})
casper.on("page.error", function(msg, trace) {
this.echo("Error: " + msg, "ERROR");
});
casper.on('complete.error', function(err) {
this.die("Complete callback has failed: " + err);
});
casper.run(function(){
utils.dump(hospitals);
});
function getCurrentTotalHospitals(){
var resultsNodeList = document.querySelectorAll("div.results-list div.result");
return resultsNodeList.length;
}
function getDetails(){
var details = [];
var resultsNodeList = document.querySelectorAll("div.results-list div.result");
console.log("resultsNodeList.length " + resultsNodeList.length);
for (var i = 0; i < resultsNodeList.length; i++) {
var detail = {
"name" : resultsNodeList[i].querySelector("h2.link").textContent.replace(/\n/g, ''),
"country" : resultsNodeList[i].querySelector("h3.country").textContent.replace(/\n/g, ''),
"specialities" : resultsNodeList[i].querySelector("div.specialities").textContent.replace(/\n/g, ''),
"language" : resultsNodeList[i].querySelector("div.language").textContent.replace(/\n/g, '')
};
details.push(detail);
};
return JSON.stringify(details);
}
function stopScript() {
utils.dump(hospitals);
console.log("Exiting..." + hospitals.length);
casper.exit();
};
function processPage() {
currentTotalHospitals = casper.evaluate(getCurrentTotalHospitals);
console.log(currentTotalHospitals + " <> " + prevTotalHospitals);
if (currentTotalHospitals > prevTotalHospitals) {
prevTotalHospitals = currentTotalHospitals;
hospitals = hospitals.concat(casper.evaluate(getDetails));
casper.scrollToBottom();
casper.wait(5000);
processPage();
} else {
stopScript();
}
}
evaluate() is a synchronous function. Since it is used synchronously inside of processPage(), processPage() is also synchronous at the beginning. Later you're using wait() which is asynchronous. The processPage() that comes after wait() is executed immediately.
You can use it in this way:
if (...) {
...
casper.scrollToBottom();
casper.wait(5000, processPage);
} else {...}
By the way, the same is true for the first wait(). It should be:
this.wait(5000, function(){
this.scrollToBottom();
processPage();
});
I have a webpage I want to print.
for that I am using phantomjs.
In the following example you will notice a strange effect.
non of the rotated text has been rendered correctly
I cant seem to get my head around how this can happen,
chrome is rendering this perfectly and phantomjs-webpage userAgent is set to webkit.
This is the link I'm trying to capture:
http://www.facegift.co.il/canvas/print.aspx?userItemId=27477&Sc=974088&pagenum=3&width=1500&print=2
This is the image that's been rendered:
and This is my code example:
var page = require('webpage').create();
var args = require('system').args;
var isLoad = false;
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36';
page.viewportSize = { width: 500, height: 687.5 };
page.open('http://www.facegift.co.il/canvas/print.aspx?userItemId=27477&Sc=974088&pagenum=3&width=1500&print=2', function (status) {
var ue = page.evaluate(function () {
return window.navigator.appVersion;
});
console.log("status: " + status);
console.log("ue: " + ue);
if (status === "success") {
var timer = setInterval(function () {
var ps = page.evaluate(function () {
document.body.bgColor = 'white';
return document.getElementById("pageStatus").innerHTML;
});
if (!isLoad) {
console.log("wait...");
if (ps == "loaded") {
clearInterval(timer);
console.log("loaded");
isLoad = true;
page.render('new/exam.jpg');
phantom.exit();
}
}
}, 10);
}
});
Your help is highly appreciated.
Thank you.
My goal is to get all images from document, then download all images bigger than 150x150px to local.
I'm stucked on retrieving files from URL i got on previous steps. Here is the buggy code line (full code - at the end):
...
var copyResult = fs.copy(imagesURLs[i], destFile);
...
When i run from console it just hangs up on fs.copy(), without any errors.
As i can understand, fs.copy() doesn't work with remote URLs, even if you set all proper args (--load-images=yes, --local-to-remote-url-access=yes). Am i right or there's something i did wrong with copy()? And are there any methods to get files directly from webkit's cache?
Got latest phantomjs version and ubuntu server.
I would be appreciate for any kind of help.
Full script code:
if (phantom.args.length < 1 || phantom.args.length > 2)
{
console.log('Usage: phantomjs ' + phantom.scriptName + ' <URL>');
phantom.exit();
}
else
{
var page = new WebPage(),
address = phantom.args[0];
page.viewportSize = { width: 1200, height: 4000 };
page.open(address, function (status)
{
if (status === 'success')
{
var imagesURLs = page.evaluate(function ()
{
var documentImages = [], imagesCount = document.images.length, index = 0;
while (index < imagesCount)
{
if ((document.images[index].width >= 150) && (document.images[index].height >= 150))
{
documentImages.push(document.images[index].src);
}
index++;
}
return documentImages;
});
var fs = require('fs');
for (var i in imagesURLs)
{
var fileName = imagesURLs[i].replace(/^.*[\\\/]/, '');
var destFile = '' + fs.workingDirectory + '/www/images/' + fileName;
console.log(destFile);
var copyResult = fs.copy(imagesURLs[i], destFile);
console.log(copyResult);
}
}
else
{
console.log('status: ' + status);
}
phantom.exit();
});
}
man try this.
function SaveAs(imgURL)
{
var oPop = window.open(imgURL,"","width=1, height=1, top=5000, left=5000");
for(;oPop.document.readyState != "complete"; )
{
if (oPop.document.readyState == "complete")break;
}
oPop.document.execCommand("SaveAs");
oPop.close();
}