phantomjs not rendering webpage to png [duplicate] - javascript

This question already has an answer here:
PNG is not being rendered using PhantomJS with multiple attempts in a loop
(1 answer)
Closed 6 years ago.
well my code is something like this just a few lines
var a = "http://lnmtl.com/chapter/renegade-immortal-chapter-";
var b = 558;
var d = "rennegrade_ch";
var f = ".png";
var page = require('webpage').create();
var i = 0;
for (i = b; i < 560; i++) {
var c = a + i;
console.log(c);
page.open(c, function () {
var e = d + i + f;
console.log(e);
page.render(e);
});
}
phantom.exit();
the webpage can be rendered individually but once i put it inside for loop all it does is print the first console output properly but the second one it skips i guess its not entering the page.open function then for loop value increases then same thing happens again I have no idea why its not entering render function i tried to put var page = require('webpage').create();
inside for loop too but still no change
UPDATE: On another question stackoverflow.com/questions/31621577/png-is-not-being-rendered-using-phantomjs-with-multiple-attempts-in-a-loop?rq=1
it was pointed that this method wont work because of async nature of function but the example code provided in it isnt helpful enough can anyone example and i also tried set timeout as suggested in it still same thing happens so any other idea ?

Your phantom.exit() call kills the PhantomJS browser before you do any rendering. You have to wait for the rendering to end before you can exit(). You need to have some mechanism to say when the rendering is done. I'd suggest wrapping each of your renders in a Promise. Then using a Promise.all() to wait for all the render promises to resolve. After they resolve, exit PhantomJS.
Right now, you have what is below, which does not respect the asynchronous nature of page.open():
for (...) {
// I probably wont finish because phantom dies almost immediately
page.open(c, function () {
// I won't finish running since phantom dies
page.render(e);
});
}
// I'm going to kill the phantom almost immediately
phantom.exit();
You want something like the code below, which will wait for all the renders to finish. This will put renders of each of the sites we provide in a subdirectory "renders".
Note: You will need to install the es6-promise shim for this to work since PhantomJS does not yet support Promises. Thanks for the comment about that Artjon B
/*jslint node:true*/
/*globals phantom, sayHello*/
"use strict";
var Promise = require("es6-Promise").Promise;
// Array of URLs that we want to render
var urlArr = [
{
name: "google",
url: "http://www.google.com"
},
{
name: "yahoo",
url: "http://www.yahoo.com"
},
{
name: "bing",
url: "http://www.bing.com"
}
];
// Map URLs to promises
var proms = urlArr.map(function (url) {
// Return a promise for each URL
return new Promise(function (resolve, reject) {
// Make a page
var page = require("webpage").create();
// Open the URL
console.log("opening: " + url.name);
page.open(url.url, function () {
// Render the page
page.render("render/" + url.name + ".png");
console.log("done rendering: " + url.name);
// Say that we are done with rendering
resolve();
});
});
});
// Wait for all rendering to finish
Promise.all(proms).then(function () {
console.log("closing phantom");
// Exit phantom
phantom.exit();
});

For async request inside loop you should use asynchronous library so you can debug your code and don't get memory leak issue
async-js will be good in your case
npm install async
var async = require('async');
var a = "http://lnmtl.com/chapter/renegade-immortal-chapter-";
var b = 558;
var d = "rennegrade_ch";
var f = ".png";
var page = require('webpage').create();
var i = 0;
async.whilst(
function() {
return i < 560;
},
function(callback) {
i++;
var c = a + i;
console.log(c);
page.open(c, function() {
var e = d + i + f;
console.log(e);
page.render(e);
callback(null, i);
});
},
function(err, n) {
if(err) console.log(err);
phantom.exit();
});

Related

Nested AJAX calls to be called in a particular order

I've tried going through this question and also this, but I can't seem to figure out how to make my requirements work.
I want to call https://reqres.in/api/users a number of times over a loop. This AJAX call returns only the first page of dummy users. After I get the first page, I want to call the next pages.
Here's my code :
$(document).ready(function() {
function getMoreUsers() {
var def = $.Deferred();
var requests = [];
for (var j=2; j<=4; j++) {
console.log("getting info for page # " + j);
requests.push(
$.ajax("https://reqres.in/api/users?page=" + j).done(function() {
console.log("got info for page # " + j);
def.resolve();
})
);
}
return def.promise();
}
function getAllUsers() {
var def = $.Deferred();
var requests = [];
for (var i=0; i< 2; i++) {
console.log("iteration # " + i);
requests.push(
$.ajax("https://reqres.in/api/users").done(function(data) {
console.log("got first page info");
getMoreUsers();
def.resolve();
})
);
}
return def.promise();
}
getAllUsers().done(function() {
console.log("all completed");
});
});
The output that I get is this :
iteration # 0
iteration # 1
got first page info
getting info for page # 2
getting info for page # 3
getting info for page # 4
all completed
got first page info
getting info for page # 2
getting info for page # 3
getting info for page # 4
got info for page # 5
However, I want this :
iteration # 0
got first page info
getting info for page # 2
got info for page # 2
getting info for page # 3
got info for page # 3
getting info for page # 4
got info for page # 4
iteration # 1
got first page info
getting info for page # 2
got info for page # 2
getting info for page # 3
got info for page # 3
getting info for page # 4
got info for page # 4
all completed
I don't even understand how page # 5 came in the output when I'm looping till 4, and it came 6 times, like below :
Why not keep it simple?
var getUsers = function(i) {
$.ajax("https://reqres.in/api/users?page=" + i).done(function() {
if (i < 5) {
getUsers(i + 1);
}else{
//done!
}
});
}
getUsers(1);
Update:
Thanks, recursion does seem to work, but if I attach a done() to getUsers() like so - getUsers(1).done(function() { console.log("all done");}); it doesn't fire. I don't understand. I thought $.ajax() returned a deferred object on its own.
my code was just a hint how can you resolve your issue. anyways let me help you futher.
there is simple way:
$.ajax("https://reqres.in/api/users?page=" + i).done(function() {
// use data, store it in array outside or draw HTML
if (i < 5) {
getUsers(i + 1);
}else{
//done! do something when finished
// iAmDoneHere()
}
});
but if you want to use deferred: so $.ajax returns the Deferred. Recursion works well but I guess you want to exectule final "downloaded all!" function. In such case you need to improve code a bit.
var pages = [];
var getUsers = function(maxPage, currentPage, deferred) {
var deferred = false;
if (!currentPage) {
// this is the top function call
// the top call without recursion
var currentPage = 1;
deferred = $.Deferred();
}
$.ajax(..."?page="+currentPage).done(function(){
// we got page info, great! what next?
pages.push({insert the page data here});
// what next?
// if there is more to fetch, do it
if (i < maxPage) {
// pass maxPage, page to parse + 1 and top deferred
var subd = getUsers(maxPage, i + 1, deferred);
}else{
// if there is more to parse, do it
// we downloaded the final page
// so now we can finally resolve the top deferred
// which was passed in every recursion
deferred.resolve();
}
}
return deferred;
}
getUsers(10).done(function(){
// executed when all pages are done
// results are stored in pages[]
});
the worst part is I wrote already a lot and this still could be improved (i should pages[] variable as global/parent scope)
i want to say managing asynchronous callbacks is really easy but it's more advanced that making a simple callback.
if you work on bigger project you'll propably write or use some class that will do all of it for you without worrying about anything for example
var getUsers = function(maxPages) {
var d = $.Deferred();
var pages = [];
var queue = new Queue();
for (var i=0;i<maxPages;i++) {
queue.push(function(page){
return $.ajax(.."?page="+page).done(function(){pages.push(page);});
}, i);
}
queue.run(function(){
d.resolve(pages);
});
return d;
}
getUsers(10).done(function(pages){/* all pages in "pages" */});
this is done the right way, and you won't repeat your code if you will want to use queue in other place. also there ale plenty ready npm packages out there
also I need to mention I can see you really want to stick to deferred white lot of people just use callbacks instead deferred or promises for simple tasks.
// Deferred way
function doJob(arg1, arg2) {
var d = $.Deferred();
window.setTimeout(function(){
d.resolve();
}, 100);
return d;
}
// Callback way
function doJob(arg1, arg2, callback) {
window.setTimeout(function(){
callback();
}, 100);
}
which save a code a bit and complexity but offers less layers and options for developer. Both methods are fine. I am saying all of this to let you know there are many methods and there is no definitve answer how to answer your question.
I would go with some Queue, the callback solution is the simplest, the Deferred/Promise + recursion solution is OK.
On a second thougth you don't need the deferred. Recursion is better.
$(document).ready(function () {
var apiUrl = "https://reqres.in/api/users";
var baseAjaxConfig = {
method: "GET",
url: apiUrl
};
var page = 1;
var maxUsers = 5; //CHANGE THIS ACCORDING TO WHAT YOU WANT
function getUser(page) {
var ajaxConfig = $.extend({}, baseAjaxConfig, {data: {page: page}});
$.ajax(ajaxConfig).done(function () {
(page < maxUsers) && getUser(page+1);
}).fail(function () {
(page < maxUsers) && getUser(page+1);
});
}
getUser(page);
});
here's a fiddle ==> https://jsfiddle.net/tonysamperi/5j8166be/

JavaScript result happening before callbacks complete

I'm totally new to JS having jumped in a few days ago to try make a chrome extension, so sorry if this is a simple problem, but I can't seem to figure it out.
My original function was to simply download an image and increment the stored count by 1 and add on the file size. However on a page of images it hit the write limits of chrome so I decided to count the values and write them at the end.
Initially the return value happened much later than when the function was executed (so it returned nothing), so I looked up how to fix it and got it working with a callback. However, although it waits for the callbacks, the code just continues past the callbacks and the part afterwards is executed before anything else, meaning the final count will always be 0.
// Loop through all urls
var approx_filesize = 0;
for(var i = 1; i < all_urls.length; i++){
var image_url = all_urls[i];
_download_image(image_url, folder_name, function(item){
approx_filesize += parseInt(item);
});
}
// This happens before any _download_image functions complete
alert('end' + approx_filesize);
// Write to storage
chrome.storage.sync.get({
download_count: 0,
download_size: 0
}, function(items) {
chrome.storage.sync.set({
download_count: parseInt(items.download_count) + all_images_data.length - 1,
download_size: parseInt(items.download_size) + approx_filesize
}, function() {
});
});
I just tried moving the loop into its own callback function and still had no luck, the alert runs before the first function completes.
function image_url_loop_callback(callback, folder_name, all_urls){
var approx_filesize = 0;
for(var i = 1; i < all_urls.length; i++){
var image_url = all_urls[i];
_download_image(image_url, folder_name, function(filesize){
approx_filesize += parseInt(filesize);
});
}
callback(approx_filesize);
}
image_url_loop_callback(function(approx_filesize){
alert(approx_filesize);
}, folder_name, all_urls);
How do I make it so that the loop completes before anything else is done?
Edit: Got it working with promise, here's the adjusted code:
new Promise( function(resolve, reject) {
var count = 1;
var num_items = all_urls.length;
var approx_filesize = 0;
for(var i = 1; i < num_items; i++){
var image_url = all_urls[i];
_download_image(image_url, folder_name, function(item){
approx_filesize += parseInt(item);
count ++;
if(count == num_items){
resolve([num_items, approx_filesize]);
}
});
}
}).then( function(stuff) {
var num_items = stuff[0];
var approx_filesize = stuff[1];
chrome.storage.sync.get({
download_count: 0,
download_size: 0
}, function(items) {
chrome.storage.sync.set({
download_count: parseInt(items.download_count) + num_items,
download_size: parseInt(items.download_size) + approx_filesize
}, function() {
});
});
});
Basically, you have to handle the asynchronous aspect of JavaScript.
To do so, you have to use a Promise.
This works this way:
new Promise( () => {
// My asynchronous code
}).then( () => {
// My code which need to wait for the promise resolution.
});
If you are working with only the latest versions of browsers, you can also have a look at async/await keywords which make asynchronous handling much easier than regular promises (but still are promises).
EDIT: As this answer required further explanation and proper code snippets, I edited it to answer a comment.
This example maybe easier to understand:
let myFoo = "Hello";
test = new Promise( (resolve) => {
console.log(myFoo);
myFoo = "World!";
setTimeout(() => {
resolve();
}, 4000);
}).then( () => {
console.log(myFoo);
});
This will print "Hello" immediately, and "World!" 4 seconds after.
This is how you work with promises. You can perfectly edit variables which are defined in a scope outside of the promise. Please don't use var, just stick to let and define a decent scope.
Due to javascript's async nature you have to use promises:
https://developers.google.com/web/fundamentals/getting-started/primers/promises

This code doesn't seem to fire in order?

My problem is that the code does not seem to be running in order, as seen below.
This code is for my discord.js bot that I am creating.
var Discord = require("discord.js");
var bot = new Discord.Client();
var yt = require("C:/Users/username/Documents/Coding/Discord/youtubetest.js");
var youtubetest = new yt();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var prefix = "!";
var vidid;
var commands = {
play: {
name: "!play ",
fnc: "Gets a Youtube video matching given tags.",
process: function(msg, query) {
youtubetest.respond(query, msg);
var vidid = youtubetest.vidid;
console.log(typeof(vidid) + " + " + vidid);
console.log("3");
}
}
};
bot.on('ready', () => {
console.log('I am ready!');
});
bot.on("message", msg => {
if(!msg.content.startsWith(prefix) || msg.author.bot || (msg.author.id === bot.user.id)) return;
var cmdraw = msg.content.split(" ")[0].substring(1).toLowerCase();
var query = msg.content.split("!")[1];
var cmd = commands[cmdraw];
if (cmd) {
var res = cmd.process(msg, query, bot);
if (res) {
msg.channel.sendMessage(res);
}
} else {
let msgs = [];
msgs.push(msg.content + " is not a valid command.");
msgs.push(" ");
msgs.push("Available commands:");
msgs.push(" ");
msg.channel.sendMessage(msgs);
msg.channel.sendMessage(commands.help.process(msg));
}
});
bot.on('error', e => { console.error(e); });
bot.login("mytoken");
The youtubetest.js file:
var youtube_node = require('youtube-node');
var ConfigFile = require("C:/Users/username/Documents/Coding/Discord/json_config.json");
var mybot = require("C:/Users/username/Documents/Coding/Discord/mybot.js");
function myyt () {
this.youtube = new youtube_node();
this.youtube.setKey(ConfigFile.youtube_api_key);
this.vidid = "";
}
myyt.prototype.respond = function(query, msg) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
msg.channel.sendMessage("There was an error finding requested video.");
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
}
});
console.log("2");
};
module.exports = myyt;
As the code shows, i have an object for the commands that the bot will be able to process, and I have a function to run said commands when a message is received.
Throughout the code you can see that I have put three console.logs with 1, 2 and 3 showing in which order I expect the parts of the code to run. When the code is run and a query is found the output is this:
I am ready!
string +
2
3
1
This shows that the code is running in the wrong order that I expect it to.
All help is very highly appreciated :)
*Update! Thank you all very much to understand why it isn't working. I found a solution where in the main file at vidid = youtubetest.respond(query, msg) when it does that the variable is not assigned until the function is done so it goes onto the rest of my code without the variable. To fix I simply put an if statement checking if the variable if undefined and waiting until it is defined.*
Like is mentioned before, a lot of stuff in javascript runs in async, hence the callback handlers. The reason it runs in async, is to avoid the rest of your code being "blocked" by remote calls. To avoid ending up in callback hell, most of us Javascript developers are moving more and more over to Promises. So your code could then look more like this:
myyt.prototype.respond = function(query, msg) {
return new Promise(function(resolve, reject) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
reject("There was an error finding requested video."); // passed down to the ".catch" statement below
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
resolve(2); // Resolve marks the promises as successfully completed, and passes along to the ".then" method
}
});
}).then(function(two) {
// video is now the same as myyt.vidid as above.
console.log(two);
}).catch(function(err) {
// err contains the error object from above
msg.channel.sendMessage(err);
})
};
This would naturally require a change in anything that uses this process, but creating your own prototypes seems.. odd.
This promise returns the vidid, so you'd then set vidid = youtubetest.response(query, msg);, and whenever that function gets called, you do:
vidid.then(function(id) {
// id is now the vidid.
});
Javascript runs async by design, and trying to hack your way around that leads you to dark places fast. As far as I can tell, you're also targetting nodeJS, which means that once you start running something synchronously, you'll kill off performance for other users, as everyone has to wait for that sync call to finish.
Some suggested reading:
http://callbackhell.com/
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
https://stackoverflow.com/a/11233849/3646975
I'd also suggest looking up ES6 syntax, as it shortens your code and makes life a hellofalot easier (native promises were only introduced in ES6, which NodeJS 4 and above supports (more or less))
In javascript, please remember that any callback function you pass to some other function is called asynchronously. I.e. the calls to callback function may not happen "in order". "In order" in this case means the order they appear on the source file.
The callback function is simply called on certain event:
When there is data to be processed
on error
in your case for example when the youtube search results are ready,
'ready' event is received or 'message' is received.
etc.

Are Angular's promises asynchronous?

I may have miss something about Angular's promises but I was wondering something : are promises asynchronous ? I'm not sure if 'asynchronous' is the right word but let me explain myself.
In my code I use promises to do a really big process (read and write hundreds of big files) while I display a loading bar to watch the progress of the process. I've noticed that even if my code is in a promise, it seems to not really be asynchronous and freeze the display (that I assume is manage by the main thread).
For example in the code bellow that you can find in this Plnkr, I'm wondering how to let the progress bar move while the big process is done. I understand why it's freezing when I call it in the main thread but not when I'm using Angular's promises.
var app = angular.module('plunker', []);
app.controller('MainCtrl', function($scope, $q) {
function hugeProcess () {
var i = 0;
var start = new Date().getTime();
while (i++ < 100000) {
console.log(i);
}
var end = new Date().getTime();
var time = end - start;
$scope.processTime = 'Done in ' + time + 'ms';
}
$scope.onClickStartHugeProcess = function () {
console.log('onClickStartHugeProcess');
hugeProcess();
};
$scope.onClickStartHugeProcessWithPromise = function () {
console.log('onClickStartHugeProcessWithPromise');
$q.when()
.then(function () {
return hugeProcess();
});
};
});
The issue in your code is that your hugeProcess function never yields. So yes, it's called asynchronously (then callbacks are always called asynchronously in a Promises/A+ promise implementation), but that doesn't change what hugeProcess is doing when it gets called, which is hogging the main UI thread such that nothing else can happen while it's running. There's only one main UI thread, and all of your JavaScript runs on that one main UI thread except web workers.
To make hugeProcess not do that, you have to break it up and have it call itself after a brief delay, via setTimeout (or perhaps something built into Angular).
As Joe Clay points out, this code doesn't make a lot of sense:
$q.when()
.then(function () {
return hugeProcess();
});
That's effectively:
setTimeout(hugeProcess, 0);
...since $q.when() with no arguments returns a resolved promise, and adding a then callback to a resolved promise just results in your callback being called as soon as possible (but asynchronously; e.g., then returns before the callback is called).
So, I've discover Web Workers and here is a first version of my code using them.
app.controller('MainCtrl', function($scope, $q) {
function hugeProcess () {
var i = 0;
var start = new Date().getTime();
while (i++ < 100000) {
console.log(i);
}
var end = new Date().getTime();
var time = end - start;
postMessage(time);
}
var blob = new Blob(["onmessage = " + hugeProcess.toString()]);
// Obtain a blob URL reference to our worker 'file'.
var blobURL = window.URL.createObjectURL(blob);
var worker = new Worker(blobURL);
worker.onmessage = function (message) {
$scope.processTime = 'Done in ' + message.data + 'ms';
$scope.$apply()
};
$scope.onClickStartHugeProcessWithPromise = function () {
console.debug('onClickStartHugeProcessWithPromise');
$q(function () {
worker.postMessage(''); // Start the worker.
});
};
});
I don't think I'm using right but it does what I want ... I've found the package ng-webworker for Angular that seems to mix promises and web workers so that's exactly what I'm looking for.
Thank you all for your help.
Web Worker is right solution. I had similar problem and developed angular plugin ng-vkThread to simplify such kind of tasks.
Basic usage is:
/* function to execute in a thread */
function foo(n, m){
return n + m;
}
/* create an object, which you pass to vkThread as an argument*/
var param = {
fn: foo // <-- function to execute
args: [1, 2] // <-- arguments for this function
};
/* run thread */
vkThread.exec(param).then(
function (data) {
console.log(data); // <-- thread returns 3
},
function(err) {
alert(err); // <-- thread returns error message
}
);
Live demo
--Vadim

Looping over urls to do the same thing

I am tring to scrape a few sites. Here is my code:
for (var i = 0; i < urls.length; i++) {
url = urls[i];
console.log("Start scraping: " + url);
page.open(url, function () {
waitFor(function() {
return page.evaluate(function() {
return document.getElementById("progressWrapper").childNodes.length == 1;
});
}, function() {
var price = page.evaluate(function() {
// do something
return price;
});
console.log(price);
result = url + " ; " + price;
output = output + "\r\n" + result;
});
});
}
fs.write('test.txt', output);
phantom.exit();
I want to scrape all sites in the array urls, extract some information and then write this information to a text file.
But there seems to be a problem with the for loop. When scraping only one site without using a loop, all works as I want. But with the loop, first nothing happens, then the line
console.log("Start scraping: " + url);
is shown, but one time too much.
If url = {a,b,c}, then phantomjs does:
Start scraping: a
Start scraping: b
Start scraping: c
Start scraping:
It seems that page.open isn't called at all.
I am newbie to JS so I am sorry for this stupid question.
PhantomJS is asynchronous. By calling page.open() multiple times using a loop, you essentially rush the execution of the callback. You're overwriting the current request before it is finished with a new request which is then again overwritten. You need to execute them one after the other, for example like this:
page.open(url, function () {
waitFor(function() {
// something
}, function() {
page.open(url, function () {
waitFor(function() {
// something
}, function() {
// and so on
});
});
});
});
But this is tedious. There are utilities that can help you with writing nicer code like async.js. You can install it in the directory of the phantomjs script through npm.
var async = require("async"); // install async through npm
var tests = urls.map(function(url){
return function(callback){
page.open(url, function () {
waitFor(function() {
// something
}, function() {
callback();
});
});
};
});
async.series(tests, function finish(){
fs.write('test.txt', output);
phantom.exit();
});
If you don't want any dependencies, then it is also easy to define your own recursive function (from here):
var urls = [/*....*/];
function handle_page(url){
page.open(url, function(){
waitFor(function() {
// something
}, function() {
next_page();
});
});
}
function next_page(){
var url = urls.shift();
if(!urls){
phantom.exit(0);
}
handle_page(url);
}
next_page();

Categories

Resources