Execute javascript (PhantomJS) after load full webpage [duplicate] - javascript

I'm using PhantomJS v1.4.1 to load some web pages. I don't have access to their server-side, I just getting links pointing to them. I'm using obsolete version of Phantom because I need to support Adobe Flash on that web pages.
The problem is many web-sites are loading their minor content async and that's why Phantom's onLoadFinished callback (analogue for onLoad in HTML) fired too early when not everything still has loaded. Can anyone suggest how can I wait for full load of a webpage to make, for example, a screenshot with all dynamic content like ads?

Another approach is to just ask PhantomJS to wait for a bit after the page has loaded before doing the render, as per the regular rasterize.js example, but with a longer timeout to allow the JavaScript to finish loading additional resources:
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 1000); // Change timeout as required to allow sufficient time
}
});

I would rather periodically check for document.readyState status (https://developer.mozilla.org/en-US/docs/Web/API/document.readyState). Although this approach is a bit clunky, you can be sure that inside onPageReady function you are using fully loaded document.
var page = require("webpage").create(),
url = "http://example.com/index.html";
function onPageReady() {
var htmlContent = page.evaluate(function () {
return document.documentElement.outerHTML;
});
console.log(htmlContent);
phantom.exit();
}
page.open(url, function (status) {
function checkReadyState() {
setTimeout(function () {
var readyState = page.evaluate(function () {
return document.readyState;
});
if ("complete" === readyState) {
onPageReady();
} else {
checkReadyState();
}
});
}
checkReadyState();
});
Additional explanation:
Using nested setTimeout instead of setInterval prevents checkReadyState from "overlapping" and race conditions when its execution is prolonged for some random reasons. setTimeout has a default delay of 4ms (https://stackoverflow.com/a/3580085/1011156) so active polling will not drastically affect program performance.
document.readyState === "complete" means that document is completely loaded with all resources (https://html.spec.whatwg.org/multipage/dom.html#current-document-readiness).
EDIT 2022:
I created this response 8 years ago and I did not use PhantomJS since then. It is very probable it won't work now in some cases. Also now I think it is not possible to create a one-size-fits-all solution to be absolutely sure the page is loaded. This is because some pages may load additional resources after document is ready. For example, there might be some JS code on the website that waits for the document to be ready an then loads some additional assets (after document state changes to ready) - in this case the onPageReady will trigger and after that the page will start loading some more resources again.
I still think the above snipped is a good starting point and may work in most cases, but may also necessary to create a specific solutions to handle specific websites.

You could try a combination of the waitfor and rasterize examples:
/**
* See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
*
* Wait until the test condition is true or a timeout occurs. Useful for waiting
* on a server response or for a ui change (fadeIn, etc.) to occur.
*
* #param testFx javascript condition that evaluates to a boolean,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* #param onReady what to do when testFx condition is fulfilled,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* #param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
*/
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
var page = require('webpage').create(), system = require('system'), address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split('*');
page.paperSize = size.length === 2 ? {
width : size[0],
height : size[1],
margin : '0px'
} : {
format : system.args[3],
orientation : 'portrait',
margin : {
left : "5mm",
top : "8mm",
right : "5mm",
bottom : "9mm"
}
};
}
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
var resources = [];
page.onResourceRequested = function(request) {
resources[request.id] = request.stage;
};
page.onResourceReceived = function(response) {
resources[response.id] = response.stage;
};
page.open(address, function(status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
for ( var i = 1; i < resources.length; ++i) {
if (resources[i] != 'end') {
return false;
}
}
return true;
}, function() {
page.render(output);
phantom.exit();
}, 10000);
}
});
}

Here is a solution that waits for all resource requests to complete. Once complete it will log the page content to the console and generate a screenshot of the rendered page.
Although this solution can serve as a good starting point, I have observed it fail so it's definitely not a complete solution!
I didn't have much luck using document.readyState.
I was influenced by the waitfor.js example found on the phantomjs examples page.
var system = require('system');
var webPage = require('webpage');
var page = webPage.create();
var url = system.args[1];
page.viewportSize = {
width: 1280,
height: 720
};
var requestsArray = [];
page.onResourceRequested = function(requestData, networkRequest) {
requestsArray.push(requestData.id);
};
page.onResourceReceived = function(response) {
var index = requestsArray.indexOf(response.id);
if (index > -1 && response.stage === 'end') {
requestsArray.splice(index, 1);
}
};
page.open(url, function(status) {
var interval = setInterval(function () {
if (requestsArray.length === 0) {
clearInterval(interval);
var content = page.content;
console.log(content);
page.render('yourLoadedPage.png');
phantom.exit();
}
}, 500);
});

Maybe you can use the onResourceRequested and onResourceReceived callbacks to detect asynchronous loading. Here's an example of using those callbacks from their documentation:
var page = require('webpage').create();
page.onResourceRequested = function (request) {
console.log('Request ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function (response) {
console.log('Receive ' + JSON.stringify(response, undefined, 4));
};
page.open(url);
Also, you can look at examples/netsniff.js for a working example.

In my program, I use some logic to judge if it was onload: watching it's network request, if there was no new request on past 200ms, I treat it onload.
Use this, after onLoadFinish().
function onLoadComplete(page, callback){
var waiting = []; // request id
var interval = 200; //ms time waiting new request
var timer = setTimeout( timeout, interval);
var max_retry = 3; //
var counter_retry = 0;
function timeout(){
if(waiting.length && counter_retry < max_retry){
timer = setTimeout( timeout, interval);
counter_retry++;
return;
}else{
try{
callback(null, page);
}catch(e){}
}
}
//for debug, log time cost
var tlogger = {};
bindEvent(page, 'request', function(req){
waiting.push(req.id);
});
bindEvent(page, 'receive', function (res) {
var cT = res.contentType;
if(!cT){
console.log('[contentType] ', cT, ' [url] ', res.url);
}
if(!cT) return remove(res.id);
if(cT.indexOf('application') * cT.indexOf('text') != 0) return remove(res.id);
if (res.stage === 'start') {
console.log('!!received start: ', res.id);
//console.log( JSON.stringify(res) );
tlogger[res.id] = new Date();
}else if (res.stage === 'end') {
console.log('!!received end: ', res.id, (new Date() - tlogger[res.id]) );
//console.log( JSON.stringify(res) );
remove(res.id);
clearTimeout(timer);
timer = setTimeout(timeout, interval);
}
});
bindEvent(page, 'error', function(err){
remove(err.id);
if(waiting.length === 0){
counter_retry = 0;
}
});
function remove(id){
var i = waiting.indexOf( id );
if(i < 0){
return;
}else{
waiting.splice(i,1);
}
}
function bindEvent(page, evt, cb){
switch(evt){
case 'request':
page.onResourceRequested = cb;
break;
case 'receive':
page.onResourceReceived = cb;
break;
case 'error':
page.onResourceError = cb;
break;
case 'timeout':
page.onResourceTimeout = cb;
break;
}
}
}

I found this approach useful in some cases:
page.onConsoleMessage(function(msg) {
// do something e.g. page.render
});
Than if you own the page put some script inside:
<script>
window.onload = function(){
console.log('page loaded');
}
</script>

I found this solution useful in a NodeJS app.
I use it just in desperate cases because it launches a timeout in order to wait for the full page load.
The second argument is the callback function which is going to be called once the response is ready.
phantom = require('phantom');
var fullLoad = function(anUrl, callbackDone) {
phantom.create(function (ph) {
ph.createPage(function (page) {
page.open(anUrl, function (status) {
if (status !== 'success') {
console.error("pahtom: error opening " + anUrl, status);
ph.exit();
} else {
// timeOut
global.setTimeout(function () {
page.evaluate(function () {
return document.documentElement.innerHTML;
}, function (result) {
ph.exit(); // EXTREMLY IMPORTANT
callbackDone(result); // callback
});
}, 5000);
}
});
});
});
}
var callback = function(htmlBody) {
// do smth with the htmlBody
}
fullLoad('your/url/', callback);

This is an implementation of Supr's answer. Also it uses setTimeout instead of setInterval as Mateusz Charytoniuk suggested.
Phantomjs will exit in 1000ms when there isn't any request or response.
// load the module
var webpage = require('webpage');
// get timestamp
function getTimestamp(){
// or use Date.now()
return new Date().getTime();
}
var lastTimestamp = getTimestamp();
var page = webpage.create();
page.onResourceRequested = function(request) {
// update the timestamp when there is a request
lastTimestamp = getTimestamp();
};
page.onResourceReceived = function(response) {
// update the timestamp when there is a response
lastTimestamp = getTimestamp();
};
page.open(html, function(status) {
if (status !== 'success') {
// exit if it fails to load the page
phantom.exit(1);
}
else{
// do something here
}
});
function checkReadyState() {
setTimeout(function () {
var curentTimestamp = getTimestamp();
if(curentTimestamp-lastTimestamp>1000){
// exit if there isn't request or response in 1000ms
phantom.exit();
}
else{
checkReadyState();
}
}, 100);
}
checkReadyState();

This the code I use:
var system = require('system');
var page = require('webpage').create();
page.open('http://....', function(){
console.log(page.content);
var k = 0;
var loop = setInterval(function(){
var qrcode = page.evaluate(function(s) {
return document.querySelector(s).src;
}, '.qrcode img');
k++;
if (qrcode){
console.log('dataURI:', qrcode);
clearInterval(loop);
phantom.exit();
}
if (k === 50) phantom.exit(); // 10 sec timeout
}, 200);
});
Basically given the fact you're supposed to know that the page is full downloaded when a given element appears on the DOM. So the script is going to wait until this happens.

I use a personnal blend of the phantomjs waitfor.js example.
This is my main.js file:
'use strict';
var wasSuccessful = phantom.injectJs('./lib/waitFor.js');
var page = require('webpage').create();
page.open('http://foo.com', function(status) {
if (status === 'success') {
page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function() {
waitFor(function() {
return page.evaluate(function() {
if ('complete' === document.readyState) {
return true;
}
return false;
});
}, function() {
var fooText = page.evaluate(function() {
return $('#foo').text();
});
phantom.exit();
});
});
} else {
console.log('error');
phantom.exit(1);
}
});
And the lib/waitFor.js file (which is just a copy and paste of the waifFor() function from the phantomjs waitfor.js example):
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
// console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condi>
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
}
This method is not asynchronous but at least am I assured that all the resources were loaded before I try using them.

This is an old question, but since I was looking for full page load but for Spookyjs (that uses casperjs and phantomjs) and didn't find my solution, I made my own script for that, with the same approach as the user deemstone .
What this approach does is, for a given quantity of time, if the page did not receive or started any request it will end the execution.
On casper.js file (if you installed it globally, the path would be something like /usr/local/lib/node_modules/casperjs/modules/casper.js) add the following lines:
At the top of the file with all the global vars:
var waitResponseInterval = 500
var reqResInterval = null
var reqResFinished = false
var resetTimeout = function() {}
Then inside function "createPage(casper)" just after "var page = require('webpage').create();" add the following code:
resetTimeout = function() {
if(reqResInterval)
clearTimeout(reqResInterval)
reqResInterval = setTimeout(function(){
reqResFinished = true
page.onLoadFinished("success")
},waitResponseInterval)
}
resetTimeout()
Then inside "page.onResourceReceived = function onResourceReceived(resource) {" on the first line add:
resetTimeout()
Do the same for "page.onResourceRequested = function onResourceRequested(requestData, request) {"
Finally, on "page.onLoadFinished = function onLoadFinished(status) {" on the first line add:
if(!reqResFinished)
{
return
}
reqResFinished = false
And that's it, hope this one helps someone in trouble like I was. This solution is for casperjs but works directly for Spooky.
Good luck !

this is my solution its worked for me .
page.onConsoleMessage = function(msg, lineNum, sourceId) {
if(msg=='hey lets take screenshot')
{
window.setInterval(function(){
try
{
var sta= page.evaluateJavaScript("function(){ return jQuery.active;}");
if(sta == 0)
{
window.setTimeout(function(){
page.render('test.png');
clearInterval();
phantom.exit();
},1000);
}
}
catch(error)
{
console.log(error);
phantom.exit(1);
}
},1000);
}
};
page.open(address, function (status) {
if (status !== "success") {
console.log('Unable to load url');
phantom.exit();
} else {
page.setContent(page.content.replace('</body>','<script>window.onload = function(){console.log(\'hey lets take screenshot\');}</script></body>'), address);
}
});

Do Mouse move while page is loading should work.
page.sendEvent('click',200, 660);
do { phantom.page.sendEvent('mousemove'); } while (page.loading);
UPDATE
When submitting the form, nothing was returned, so the program stopped. The program did not wait for the page to load as it took a few seconds for the redirect to begin.
telling it to move the mouse until the URL changes to the home page gave the browser as much time as it needed to change. then telling it to wait for the page to finish loading allowed the page to full load before the content was grabbed.
page.evaluate(function () {
document.getElementsByClassName('btn btn-primary btn-block')[0].click();
});
do { phantom.page.sendEvent('mousemove'); } while (page.evaluate(function()
{
return document.location != "https://www.bestwaywholesale.co.uk/";
}));
do { phantom.page.sendEvent('mousemove'); } while (page.loading);

Related

jest + enzyme + react16: <img> src : request not sent

I'm using jest + enzyme to test my react component "AnimateImage" which contains an image element:
import * as React from 'react';
import { PureComponent } from 'react';
interface Props {
src: string;
}
class AnimateImage extends PureComponent<Props> {
onImgLoad = (e: Event | {target: HTMLImageElement}) => {
console.log("yes!");
};
render() {
return (
<div className="app-image-container">
<img
ref={c => {
if (!c) {
return;
}
c.onerror = function(e){
console.log("error:" , e);
}
if(!c.onload){
c.onload = this.onImgLoad;
if (c && c.complete && c.naturalWidth !== 0) {
this.onImgLoad({
target: c
})
}
}
}}
src={this.props.src}
/>
</div>
);
}
}
export default AnimateImage;
test code:
test("image ", () => {
const component = mount(<AnimateImage src={url_test}/>);
expect(component).toMatchSnapshot();
console.log("end ##################################################################");
})
the expected result:
the image's onload handler is called and I can see the "yes!" printed in the console.
the real result:
the image's onload handler is not called and the image's complete attribute is false.
my jest configuration:
verbose: true,
transform: {
'.(ts|tsx)': 'ts-jest'
},
snapshotSerializers: ['enzyme-to-json/serializer'],
moduleFileExtensions: ['ts', 'tsx', 'js', 'json'],
testEnvironment: "jest-environment-jsdom-fourteen",
testEnvironmentOptions: { "resources": 'usable' },
debug step:
I've confirmed that the Canvas is installed successfully and works well in the jsdom.
the jsdom's resource-loader uses "request-promise-native" package to fetch HTTP resource. The "request-promise-native" package's core is "request" package.
in the "request" package, the request.js file declares a class called Request to handle HTTP request.
But I found that the Request.start() function is never called and the defer function is called with the request's status "abort".
by the way, I've put two "console.log()" in the function where the simulated "window" and "document" call "close" function and "console.log('abort')" in the place where the request is handled.
the result shows that the jsdom "window" is closed before the real HTTP request starts outgoing and then, this request's status is set to be "abort".
bogon: yarn test:dom
yarn run v1.10.1
$ jest --config jest.config.js
PASS animate-image.spec.tsx
✓ image (75ms)
console.log xxxxxxxxx/animate-image.spec.tsx:34
end ##################################################################
window close
document close
http://XXXXX.cdn.com
abort
some piece of code in the request.js, may be helpful to understand the problem:
var defer = typeof setImmediate === 'undefined'
? process.nextTick
: setImmediate
defer(function () {
if (self._aborted) {
return
}
var end = function () {
if (self._form) {
if (!self._auth.hasAuth) {
self._form.pipe(self)
} else if (self._auth.hasAuth && self._auth.sentAuth) {
self._form.pipe(self)
}
}
if (self._multipart && self._multipart.chunked) {
self._multipart.body.pipe(self)
}
if (self.body) {
if (isstream(self.body)) {
self.body.pipe(self)
} else {
setContentLength()
if (Array.isArray(self.body)) {
self.body.forEach(function (part) {
self.write(part)
})
} else {
self.write(self.body)
}
self.end()
}
} else if (self.requestBodyStream) {
console.warn('options.requestBodyStream is deprecated, please pass the request object to stream.pipe.')
self.requestBodyStream.pipe(self)
} else if (!self.src) {
if (self._auth.hasAuth && !self._auth.sentAuth) {
self.end()
return
}
if (self.method !== 'GET' && typeof self.method !== 'undefined') {
self.setHeader('content-length', 0)
}
self.end()
}
}
if (self._form && !self.hasHeader('content-length')) {
// Before ending the request, we had to compute the length of the whole form, asyncly
self.setHeader(self._form.getHeaders(), true)
self._form.getLength(function (err, length) {
if (!err && !isNaN(length)) {
self.setHeader('content-length', length)
}
end()
})
} else {
end()
}
self.ntick = true
})
Request.prototype.start = function () {
// start() is called once we are ready to send the outgoing HTTP request.
// this is usually called on the first write(), end() or on nextTick()
var self = this
if (self.timing) {
// All timings will be relative to this request's startTime. In order to do this,
// we need to capture the wall-clock start time (via Date), immediately followed
// by the high-resolution timer (via now()). While these two won't be set
// at the _exact_ same time, they should be close enough to be able to calculate
// high-resolution, monotonically non-decreasing timestamps relative to startTime.
var startTime = new Date().getTime()
var startTimeNow = now()
}
if (self._aborted) {
return
}
self._started = true
self.method = self.method || 'GET'
self.href = self.uri.href
if (self.src && self.src.stat && self.src.stat.size && !self.hasHeader('content-length')) {
self.setHeader('content-length', self.src.stat.size)
}
if (self._aws) {
self.aws(self._aws, true)
}
// We have a method named auth, which is completely different from the http.request
// auth option. If we don't remove it, we're gonna have a bad time.
var reqOptions = copy(self)
delete reqOptions.auth
debug('make request', self.uri.href)
// node v6.8.0 now supports a `timeout` value in `http.request()`, but we
// should delete it for now since we handle timeouts manually for better
// consistency with node versions before v6.8.0
delete reqOptions.timeout
try {
self.req = self.httpModule.request(reqOptions)
} catch (err) {
self.emit('error', err)
return
}
if (self.timing) {
self.startTime = startTime
self.startTimeNow = startTimeNow
// Timing values will all be relative to startTime (by comparing to startTimeNow
// so we have an accurate clock)
self.timings = {}
}
var timeout
if (self.timeout && !self.timeoutTimer) {
if (self.timeout < 0) {
timeout = 0
} else if (typeof self.timeout === 'number' && isFinite(self.timeout)) {
timeout = self.timeout
}
}
self.req.on('response', self.onRequestResponse.bind(self))
self.req.on('error', self.onRequestError.bind(self))
self.req.on('drain', function () {
self.emit('drain')
})
self.req.on('socket', function (socket) {
// `._connecting` was the old property which was made public in node v6.1.0
var isConnecting = socket._connecting || socket.connecting
if (self.timing) {
self.timings.socket = now() - self.startTimeNow
if (isConnecting) {
var onLookupTiming = function () {
self.timings.lookup = now() - self.startTimeNow
}
var onConnectTiming = function () {
self.timings.connect = now() - self.startTimeNow
}
socket.once('lookup', onLookupTiming)
socket.once('connect', onConnectTiming)
// clean up timing event listeners if needed on error
self.req.once('error', function () {
socket.removeListener('lookup', onLookupTiming)
socket.removeListener('connect', onConnectTiming)
})
}
}
var setReqTimeout = function () {
// This timeout sets the amount of time to wait *between* bytes sent
// from the server once connected.
//
// In particular, it's useful for erroring if the server fails to send
// data halfway through streaming a response.
self.req.setTimeout(timeout, function () {
if (self.req) {
self.abort()
var e = new Error('ESOCKETTIMEDOUT')
e.code = 'ESOCKETTIMEDOUT'
e.connect = false
self.emit('error', e)
}
})
}
if (timeout !== undefined) {
// Only start the connection timer if we're actually connecting a new
// socket, otherwise if we're already connected (because this is a
// keep-alive connection) do not bother. This is important since we won't
// get a 'connect' event for an already connected socket.
if (isConnecting) {
var onReqSockConnect = function () {
socket.removeListener('connect', onReqSockConnect)
clearTimeout(self.timeoutTimer)
self.timeoutTimer = null
setReqTimeout()
}
socket.on('connect', onReqSockConnect)
self.req.on('error', function (err) { // eslint-disable-line handle-callback-err
socket.removeListener('connect', onReqSockConnect)
})
// Set a timeout in memory - this block will throw if the server takes more
// than `timeout` to write the HTTP status and headers (corresponding to
// the on('response') event on the client). NB: this measures wall-clock
// time, not the time between bytes sent by the server.
self.timeoutTimer = setTimeout(function () {
socket.removeListener('connect', onReqSockConnect)
self.abort()
var e = new Error('ETIMEDOUT')
e.code = 'ETIMEDOUT'
e.connect = true
self.emit('error', e)
}, timeout)
} else {
// We're already connected
setReqTimeout()
}
}
self.emit('socket', socket)
})
self.emit('request', self.req)
}
I can't get the HTTP request sent to fetch the image source. Thus I can't get the img.onload handler to be called.
anyone could help me to explain this problem?
Finally I didn't find a way to send a request successfully for loading image.
My solution is: mock the HTMLImageElement's prototype in my test code:
Object.defineProperty(HTMLImageElement.prototype, 'naturalWidth', { get: () => 120 });
Object.defineProperty(HTMLImageElement.prototype, 'complete', { get: () => true });
Thus I don't need to get the real image any more and meanwhile I can finish my test case successfully.

How to trigger Ajax Request for filling a Form out and wait until DOM changes?

I am going to fill out a Form using JQuery through PhantomJS.
I have following script for doing that:
var page = require('webpage').create();
page.open('http://demo.opencart.com/index.php?route=account/register', function() {
fillTheForm();
phantom.exit();
});
function fillTheForm () {
page.evaluate(function() {
var selectTags = new Array();
selectTags = document.getElementsByTagName('select');
$(selectTags[0]).val("38");
$(selectTags[0]).trigger('change');
$(selectTags[1]).val('610');
});
page.render('form.png');
};
after running this script, I got following message inside the console!
Alert, JavaScript error
Also, the picture that I have, after trying to fill the Form out, tells me that the existing values for the second Select box have not changed yet and then PhantomJS could not assign the value to the second field.
Can someone please help me to solve this problem? How can I fill out this two fields using JQuery and PhantomJS?
This is the chained selects problem. The second one is dependent on the first one and is populated via AJAX. This means that it is asynchronous. You have to wait before you can set the second value. In PhantomJS this is also problematic, because you have two contexts (page context and phantom context) that have do be "synchronized".
For example you can use
function fillTheForm () {
page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
$(selectTags[0]).val("38");
$(selectTags[0]).trigger('change');
});
setTimeout(function(){
page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
$(selectTags[1]).val('610');
});
page.render('form.png');
phantom.exit(); // this has to be inside, because everything is asynchronous now
}, 3000); // assuming 3 seconds are enough time for the request
};
A better and more robust way would be to use waitFor from the examples, because it is finished as soon as the data is available. Here I have some indicators when the second select is reloaded:
var page = require('webpage').create();
page.open('http://demo.opencart.com/index.php?route=account/register', function() {
fillTheForm(function(){
page.render('form.png');
phantom.exit();
});
});
function fillTheForm(done) {
page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
// custom indicators, perhaps something more elaborate is needed for general selects
// in this case it is ok
window._chainedSelectChildrenLength = selectTags[1].children.length;
window._chainedSelectFirstChildText = selectTags[1].children[0].innerText;
$(selectTags[0]).val("38");
$(selectTags[0]).trigger('change');
});
waitFor(function testFx(){
return page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
// use indicators
return window._chainedSelectChildrenLength !== selectTags[1].children.length ||
window._chainedSelectFirstChildText !== selectTags[1].children[0].innerText;
});
}, function onReady(){
page.evaluate(function(){
// continue
var selectTags = document.getElementsByTagName('select');
$(selectTags[1]).val('610');
});
done();
}, 5000); // 3 seconds is the default
};
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};

Function called once, but firing multiple times

The following is a snippet from a PhantomJS script. It tracks dynamic content on an AJAXd webpage. track() is called once, but for some reason page.open() is called 3 times
function track(url){
console.log('Tracking',url);
var page = require('webpage').create();
console.log('check2')
if(page){
console.log('check4');
page.open(url, function (status) {
console.log('check3');
if (status !== 'success') {
console.log('Unable to load the address!');
setTimeout(function(){start();},1000);
setTimeout(function(){page.release();},5000);
}
else {
console.log('check');
var i = 0;
var last_winner = false;
var logged_once = false;
var interval = false;
if(!interval){
interval = setInterval(function(){
var scraping = scrape(page);
var date = new Date();
var time = date.getTime();
if(scraping){/*Bunch of console logs*/}
else{
console.log('Bidding ended');
clearInterval(interval);
setTimeout(function(){start();},1000);
setTimeout(function(){page.release();},5000);
}
scraping = false;
},1000);
};
};
});
};
};
Logs the following to the console:
Tracking http://www.google.com
check2
check4
check3
check
check3
check
check3
check
For some reason I can't figure out, page.open() is being called 3 times.
Apparently PhantomJS calls page.open multiple times if there are redirects or iFrames being loaded on the page.
There are some suggestions of how to handle that on the PhantomJS bug tracker.
http://code.google.com/p/phantomjs/issues/detail?id=353&q=open%20callback

Getting UnreadmessagesCount

The question I have here is for some reason when the getInboxUnreadMessagesCount js function is ran then it comes up with a different number then what is there to begin with and keep in mind there is no new message being sent. When I run the php dashboard functions they both are returning the correct numbers but I think the issue lies with the last line of code with the messageTimer
Anybody even have any thoughts onto what it might be? I'm hoping someone can figure it out.
var $messageCountJSON;
var messageTimer = '';
var messageInterval = 5;
//assumed JSON response is {"count":"20"} for example sake.
function getInboxUnreadMessagesCount(displayElementID) {
$.get('dashboard/getInboxUnreadMessagesCount', function (data) {
$messageCountJSON = data;
}, 'json');
if (displayElementID != null && displayElementID != undefined && displayElementID != '') {
//$('#'+displayElementID).html($messageCountJSON);
if (parseInt($('#' + displayElementID).text()) < parseInt($messageCountJSON)) {
$.jGrowl("You have received a new private message!", { theme: 'information' });
$('#' + displayElementID).html($messageCountJSON).css({ "display": "block" });
}
if (parseInt($messageCountJSON) == 0) {
$('#' + displayElementID).html($messageCountJSON).css({ "display": "none" });
}
}
}
function getInboxMessagesCount(displayElementID) {
$.get('dashboard/getInboxMessagesCount', function (data) {
$messageCountJSON = data;
}, 'json');
if (displayElementID != null && displayElementID != undefined && displayElementID != '') {
//$('#'+displayElementID).html($messageCountJSON);
if (parseInt($('#' + displayElementID).text()) < parseInt($messageCountJSON)) {
$('#' + displayElementID).html($messageCountJSON);
}
if (parseInt($messageCountJSON) == 0) {
$('#' + displayElementID).html($messageCountJSON);
}
}
}
$(document).ready(function () {
messageTimer = setInterval(function () { getInboxUnreadMessagesCount('notifications'); getInboxMessagesCount('inboxCount'); }, messageInterval * 1000);
});
//you can optionally kill the timed interval with something like
//$('#pmMessagesIcon').click(function(){clearInterval(messageTimer);})
You are trying to access the message count before it's received:
// Here you create an asynchronous request to the server.
$.get('dashboard/getInboxUnreadMessagesCount', function (data) {
// This section of your code will only run after you get the JSON response
$messageCountJSON = data;
}, 'json');
// Code here will run immediately after the request is fired,
// and probably before the JSON response arrives
You have to move your big if statements to inside each $.get() callback function.

This javascript setTimeout interacts with ajax requests in a really weird way

I'm writing this script so that it displays the status of an import script. It's supposed to call a function, that runs a http request, every X seconds.
function progres_import() {
//if(import_status != 'finalizat') {
alert("progres_import");
setTimeout(function() { return update_progres_import(); }, 2000);
setTimeout(function() { return update_progres_import(); }, 4000);
setTimeout(function() { return update_progres_import(); }, 6000);
setTimeout(function() { return update_progres_import(); }, 8000);
//setTimeout(function() { progres_import(); }, 400);
//}
//else {
//}
}
this is what i used to test the functionality. I put the comments in too just to show what I intend to ultimately do with it. I tried all the possible setTimeout calls, with quotes, without quotes, with and without the anonymous function.
var xmlhttp_import_progres;
function update_progres_import() {
xmlhttp_import_progres=GetXMLHttpObject();
if (xmlhttp_import_progres==null) {
alert ("Browser does not support HTTP Request (xmlhttp_import_progres)");
return;
}
var url="crm/ferestre/import_progres.php";
url=url+"?sid="+Math.random();
xmlhttp_import_progres.onreadystatechange=function() {
if (xmlhttp_import_progres.readyState == 4) {
progres_resp = xmlhttp_import_progres.responseText;
progres = progres_resp.split('_');
import_nrc = progres[0];
import_nrt = progres[1];
import_status = progres[2];
mesaj = 'Progres import: ' + import_nrc + ' / ' + import_nrt;
//document.getElementById("corp_import_mesaj").innerHTML = mesaj;
alert(progres_resp);
}
};
xmlhttp_import_progres.open("POST",url,true);
xmlhttp_import_progres.send(null);
}
this is the business end of the progres_import function.
what happens is i get the alert("progress_import") in the first function right as the import process starts, but the alert(progres_resp) in the second one starts popping up only after the import process is over (it still maintains the 2 second interval so in that sense the setTimeouts worked).
the php script in the ajax request just takes some session variables that the import script sets and prints them for the javascript to use (x imports of y total, z failed, stuff like this)
Any idea why it behaves like this?
xmlhttp_import_progres.readyState == 4) is only true at the end of the request. Hence, your alert dialogs pop up after finishing the request.
Furthermore, you can't expect your function to show alerts after a 2 second interval, because the server may or may not respond as fast.
A final note: If you want to have a periodical update function, use setInterval(function(){...}, 2000).
EDIT
Also, add var in this way: var xmlhttp_import_progres = GetXMLHttpObject();. Currently, you're globally defining the HTTP object, causing only one instance of the HTTP object to be accessible.
Here, can you try to edit just a little:
Please consider the above answer, but this code will make clear for you:
function progres_import() {
//if(import_status != 'finalizat') {
alert("progres_import");
setTimeout(function() { return update_progres_import(0); }, 2000);
setTimeout(function() { return update_progres_import(1); }, 4000);
setTimeout(function() { return update_progres_import(2); }, 6000);
setTimeout(function() { return update_progres_import(3); }, 8000);
//setTimeout(function() { progres_import(); }, 400);
//}
//else {
//}
}
AND
var xmlhttp_import_progres = [];
function update_progres_import(i) {
xmlhttp_import_progres[i]= GetXMLHttpObject();
if (xmlhttp_import_progres[i]==null) {
alert ("Browser does not support HTTP Request (xmlhttp_import_progres)");
return;
}
var url="crm/ferestre/import_progres.php";
url=url+"?sid="+Math.random();
xmlhttp_import_progres[i].onreadystatechange=function() {
if (xmlhttp_import_progres[i].readyState == 4) {
progres_resp = xmlhttp_import_progres[i].responseText;
progres = progres_resp.split('_');
import_nrc = progres[0];
import_nrt = progres[1];
import_status = progres[2];
mesaj = 'Progres import: ' + import_nrc + ' / ' + import_nrt;
//document.getElementById("corp_import_mesaj").innerHTML = mesaj;
alert(progres_resp);
}
};
xmlhttp_import_progres[i].open("POST",url,true);
xmlhttp_import_progres[i].send(null);
}

Categories

Resources