Function called once, but firing multiple times - javascript

The following is a snippet from a PhantomJS script. It tracks dynamic content on an AJAXd webpage. track() is called once, but for some reason page.open() is called 3 times
function track(url){
console.log('Tracking',url);
var page = require('webpage').create();
console.log('check2')
if(page){
console.log('check4');
page.open(url, function (status) {
console.log('check3');
if (status !== 'success') {
console.log('Unable to load the address!');
setTimeout(function(){start();},1000);
setTimeout(function(){page.release();},5000);
}
else {
console.log('check');
var i = 0;
var last_winner = false;
var logged_once = false;
var interval = false;
if(!interval){
interval = setInterval(function(){
var scraping = scrape(page);
var date = new Date();
var time = date.getTime();
if(scraping){/*Bunch of console logs*/}
else{
console.log('Bidding ended');
clearInterval(interval);
setTimeout(function(){start();},1000);
setTimeout(function(){page.release();},5000);
}
scraping = false;
},1000);
};
};
});
};
};
Logs the following to the console:
Tracking http://www.google.com
check2
check4
check3
check
check3
check
check3
check
For some reason I can't figure out, page.open() is being called 3 times.

Apparently PhantomJS calls page.open multiple times if there are redirects or iFrames being loaded on the page.
There are some suggestions of how to handle that on the PhantomJS bug tracker.
http://code.google.com/p/phantomjs/issues/detail?id=353&q=open%20callback

Related

Google Cloud SQL not updating with script

I have a long script which is designed to edit a specific row in the Cloud SQL table. The code is long so i will shorten it.
Client Side:
function build_profile(){
var cbid = sessionStorage.getItem("client_id");
var self = this;
var createSuccess = function(data){
var statuse = ["Active", "Wiating", "Discharged"];
if(data !== false){
data = data.split(",");
var dec = app.pages.Profile.descendants;
dec.fname.text = data[1];
dec.sname.text = data[3];
sessionStorage.setItem("school_id", data[9]);
app.popups.Loading.visible = false;
}
};
var init = function() {google.script.run.withSuccessHandler(createSuccess).get_user_data(cbid);};
app.popups.Loading.visible = true;
init();
}
function save_profile() {
var createSuccess = function(data){
var dec = app.pages.Profile.descendants;
console.log(data);
if(data !== -1){
var ds = app.datasources.Clients;
ds.load(function(){
ds.selectIndex(data);
console.log("editing:"+ds.item.CBID);
ds.item.fname = dec.fname_edit.value;
ds.item.sname = dec.sname_edit.value;
ds.load(function(){build_profile();});
});
}
}};
var init = function() {google.script.run.withSuccessHandler(createSuccess).update_client(sessionStorage.getItem("client_id"));};
init();
}
Server Side:
function get_user_data(cbid){
try{
var query = app.models.Clients.newQuery();
query.filters.CBID._equals = parseInt(cbid);
var results = query.run();
if(results.length > 0){
var arr = [
results[0].Id, //0
results[0].fname, //1
results[0].sname //3
];
return arr.join(",");
}else{
return false;
}
}catch(e){
console.error(e);
console.log("function get_user_data");
return false;
}
}
function update_client(cbid) {
try{
var ds = app.models.Clients;
var query = ds.newQuery();
query.filters.CBID._equals = parseInt(cbid);
var results = query.run();
if(results.length > 0){
var id = results[0]._key;
return id+1;
}else{
return -1;
}
}catch(e){
console.error(e);
return -1;
}
}
This gets the Clients table and updates the row for the selected client, then rebuilds the profile with the new information.
EDIT: I have managed to get to a point where its telling me that i cannot run the query (ds.load()) while processing its results. There does not seem to be a manual check to see if it has processed?
Note: datasource.saveChanges() does not work as it saves automatically.
You error is being produced by the client side function save_profile() and it is exactly in this block:
ds.load(function(){
ds.selectIndex(data);
console.log("editing:"+ds.item.CBID);
ds.item.fname = dec.fname_edit.value;
ds.item.sname = dec.sname_edit.value;
ds.load(function(){build_profile();});
});
So what you are doing is reloading the datasource almost immediately before it finishes loading hence you are getting that error
cannot run the query (ds.load()) while processing its results
This is just a matter of timing. A setTimeout can take of the issue. Just do the following:
ds.load(function(){
ds.selectIndex(data);
console.log("editing:"+ds.item.CBID);
ds.item.fname = dec.fname_edit.value;
ds.item.sname = dec.sname_edit.value;
setTimeout(function(){
ds.load(function(){build_profile();});
},1000);
});
I have manage to find a solution to this particular issue. It requires Manual Saving but it saves a lot of hassle as one of the inbuilt solutions can be used rather than relying on dealing with errors or timeouts.
function client_query_and_result(){
var createSuccess = function(data){ //callback function
console.log(data);
};
app.datasources.SomeTable.saveChanges(function(){//ensures all changes have been saved
app.datasources.SomeTable.load(function(){//makes sure to reload the datasource
google.script.run.withSuccessHandler(createSuccess).server_query_and_result(); //at this point All data has been saved and reloaded
});
});
}
The Server side code is the exact same methods. To enable manual saving you can select the table in App Maker -> Datasources -> check "Manual save mode".
Hope this can be useful to someone else.

Intermittent failure in nodeJS timer function

I am not familiar with this function, however I am seeing intermittent failures, sometimes the timer function will execute and the newState variable switches, sometimes it doesn't. Please can you check my understanding of what this is doing?
function motionHandler() {
console.log('im in motionhandler func')
var newState = true;
changeAction(newState);
if(this.timer !== undefined) clearTimeout(this.timer);
this.timer = setTimeout(function(){changeAction(!newState);}, this.window_seconds * 1000);
};
From what I understand, when this function executes I set the newState variable to true. I then execute changeAction which sets my motion detector to "true" (motion detected).
I then create a timer. If this.timer has something in it, then clear. I then create a timeout which will countdown from window_seconds * 1000 (ie. 5x1000 milliseconds = 5 seconds). Once that timeout is reached, I will execute the changeAction function and set newState to the opposite of what it currently is?
Assuming all of that is correct, sometimes newState gets reset, other times it doesn't.
I am executing the motionHandler function every time I receive a particular RF code from a transmitter. The timeout is there to reset the motion detector back to false when no codes are received.
The full code is actually a plugin for home bridge, and can be seen here:
https://github.com/mattnewham/homebridge-RFReceiver/blob/master/index.js
This is my first real foray into Javascript/NodeJS so I don't really know how to troubleshoot this (other than my console.logs!)
Full code:
var Service;
var Characteristic;
var rpi433 = require("rpi-433"),
rfSniffer = rpi433.sniffer({
pin: 2, //Snif on GPIO 2 (or Physical PIN 13)
debounceDelay: 1000 //Wait 500ms before reading another code
}),
rfEmitter = rpi433.emitter({
pin: 0, //Send through GPIO 0 (or Physical PIN 11)
pulseLength: 350 //Send the code with a 350 pulse length
});
var debug = require("debug")("RFReceiverAccessory");
var crypto = require("crypto");
module.exports = function(homebridge) {
Service = homebridge.hap.Service;
Characteristic = homebridge.hap.Characteristic;
homebridge.registerAccessory("homebridge-RFReceiver", "RFReceiver", RFReceiverAccessory);
}
function RFReceiverAccessory(log, config) {
this.log = log;
// url info
this.name = config["name"];
this.rfcode = config["rfcode"] || 4;
this.window_seconds = config["window_seconds"] || 5;
this.sensor_type = config["sensor_type"] || "m";
this.inverse = config["inverse"] || false;
}
RFReceiverAccessory.prototype = {
getServices: function() {
// you can OPTIONALLY create an information service if you wish to override
// the default values for things like serial number, model, etc.
var informationService = new Service.AccessoryInformation();
informationService
.setCharacteristic(Characteristic.Name, this.name)
.setCharacteristic(Characteristic.Manufacturer, "Homebridge")
.setCharacteristic(Characteristic.Model, "RF Receiver")
.setCharacteristic(Characteristic.SerialNumber, "12345");
var service, changeAction;
if(this.sensor_type === "c"){
service = new Service.ContactSensor();
changeAction = function(newState){
service.getCharacteristic(Characteristic.ContactSensorState)
.setValue(newState ? Characteristic.ContactSensorState.CONTACT_DETECTED : Characteristic.ContactSensorState.CONTACT_NOT_DETECTED);
};
} else {
service = new Service.MotionSensor();
changeAction = function(newState){
console.log('changing state');
service.getCharacteristic(Characteristic.MotionDetected)
.setValue(newState);
};
}
function motionHandler() {
console.log('im in motionhandler func')
var newState = true;
changeAction(newState);
if(this.timer !== undefined) clearTimeout(this.timer);
this.timer = setTimeout(function(){changeAction(!newState);}, this.window_seconds * 1000);
};
var code = this.rfcode
var name = this.name
rfSniffer.on('data', function (data) {
console.log('Code received: '+data.code+' pulse length : '+data.pulseLength);
console.log(code);
if(data.code == code){
console.log("Motion Detected In" +name);
motionHandler()};
});
return [informationService, service];
}
};

Execute javascript (PhantomJS) after load full webpage [duplicate]

I'm using PhantomJS v1.4.1 to load some web pages. I don't have access to their server-side, I just getting links pointing to them. I'm using obsolete version of Phantom because I need to support Adobe Flash on that web pages.
The problem is many web-sites are loading their minor content async and that's why Phantom's onLoadFinished callback (analogue for onLoad in HTML) fired too early when not everything still has loaded. Can anyone suggest how can I wait for full load of a webpage to make, for example, a screenshot with all dynamic content like ads?
Another approach is to just ask PhantomJS to wait for a bit after the page has loaded before doing the render, as per the regular rasterize.js example, but with a longer timeout to allow the JavaScript to finish loading additional resources:
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 1000); // Change timeout as required to allow sufficient time
}
});
I would rather periodically check for document.readyState status (https://developer.mozilla.org/en-US/docs/Web/API/document.readyState). Although this approach is a bit clunky, you can be sure that inside onPageReady function you are using fully loaded document.
var page = require("webpage").create(),
url = "http://example.com/index.html";
function onPageReady() {
var htmlContent = page.evaluate(function () {
return document.documentElement.outerHTML;
});
console.log(htmlContent);
phantom.exit();
}
page.open(url, function (status) {
function checkReadyState() {
setTimeout(function () {
var readyState = page.evaluate(function () {
return document.readyState;
});
if ("complete" === readyState) {
onPageReady();
} else {
checkReadyState();
}
});
}
checkReadyState();
});
Additional explanation:
Using nested setTimeout instead of setInterval prevents checkReadyState from "overlapping" and race conditions when its execution is prolonged for some random reasons. setTimeout has a default delay of 4ms (https://stackoverflow.com/a/3580085/1011156) so active polling will not drastically affect program performance.
document.readyState === "complete" means that document is completely loaded with all resources (https://html.spec.whatwg.org/multipage/dom.html#current-document-readiness).
EDIT 2022:
I created this response 8 years ago and I did not use PhantomJS since then. It is very probable it won't work now in some cases. Also now I think it is not possible to create a one-size-fits-all solution to be absolutely sure the page is loaded. This is because some pages may load additional resources after document is ready. For example, there might be some JS code on the website that waits for the document to be ready an then loads some additional assets (after document state changes to ready) - in this case the onPageReady will trigger and after that the page will start loading some more resources again.
I still think the above snipped is a good starting point and may work in most cases, but may also necessary to create a specific solutions to handle specific websites.
You could try a combination of the waitfor and rasterize examples:
/**
* See https://github.com/ariya/phantomjs/blob/master/examples/waitfor.js
*
* Wait until the test condition is true or a timeout occurs. Useful for waiting
* on a server response or for a ui change (fadeIn, etc.) to occur.
*
* #param testFx javascript condition that evaluates to a boolean,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* #param onReady what to do when testFx condition is fulfilled,
* it can be passed in as a string (e.g.: "1 == 1" or "$('#bar').is(':visible')" or
* as a callback function.
* #param timeOutMillis the max amount of time to wait. If not specified, 3 sec is used.
*/
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()), //< defensive code
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
var page = require('webpage').create(), system = require('system'), address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]');
console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split('*');
page.paperSize = size.length === 2 ? {
width : size[0],
height : size[1],
margin : '0px'
} : {
format : system.args[3],
orientation : 'portrait',
margin : {
left : "5mm",
top : "8mm",
right : "5mm",
bottom : "9mm"
}
};
}
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
var resources = [];
page.onResourceRequested = function(request) {
resources[request.id] = request.stage;
};
page.onResourceReceived = function(response) {
resources[response.id] = response.stage;
};
page.open(address, function(status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
for ( var i = 1; i < resources.length; ++i) {
if (resources[i] != 'end') {
return false;
}
}
return true;
}, function() {
page.render(output);
phantom.exit();
}, 10000);
}
});
}
Here is a solution that waits for all resource requests to complete. Once complete it will log the page content to the console and generate a screenshot of the rendered page.
Although this solution can serve as a good starting point, I have observed it fail so it's definitely not a complete solution!
I didn't have much luck using document.readyState.
I was influenced by the waitfor.js example found on the phantomjs examples page.
var system = require('system');
var webPage = require('webpage');
var page = webPage.create();
var url = system.args[1];
page.viewportSize = {
width: 1280,
height: 720
};
var requestsArray = [];
page.onResourceRequested = function(requestData, networkRequest) {
requestsArray.push(requestData.id);
};
page.onResourceReceived = function(response) {
var index = requestsArray.indexOf(response.id);
if (index > -1 && response.stage === 'end') {
requestsArray.splice(index, 1);
}
};
page.open(url, function(status) {
var interval = setInterval(function () {
if (requestsArray.length === 0) {
clearInterval(interval);
var content = page.content;
console.log(content);
page.render('yourLoadedPage.png');
phantom.exit();
}
}, 500);
});
Maybe you can use the onResourceRequested and onResourceReceived callbacks to detect asynchronous loading. Here's an example of using those callbacks from their documentation:
var page = require('webpage').create();
page.onResourceRequested = function (request) {
console.log('Request ' + JSON.stringify(request, undefined, 4));
};
page.onResourceReceived = function (response) {
console.log('Receive ' + JSON.stringify(response, undefined, 4));
};
page.open(url);
Also, you can look at examples/netsniff.js for a working example.
In my program, I use some logic to judge if it was onload: watching it's network request, if there was no new request on past 200ms, I treat it onload.
Use this, after onLoadFinish().
function onLoadComplete(page, callback){
var waiting = []; // request id
var interval = 200; //ms time waiting new request
var timer = setTimeout( timeout, interval);
var max_retry = 3; //
var counter_retry = 0;
function timeout(){
if(waiting.length && counter_retry < max_retry){
timer = setTimeout( timeout, interval);
counter_retry++;
return;
}else{
try{
callback(null, page);
}catch(e){}
}
}
//for debug, log time cost
var tlogger = {};
bindEvent(page, 'request', function(req){
waiting.push(req.id);
});
bindEvent(page, 'receive', function (res) {
var cT = res.contentType;
if(!cT){
console.log('[contentType] ', cT, ' [url] ', res.url);
}
if(!cT) return remove(res.id);
if(cT.indexOf('application') * cT.indexOf('text') != 0) return remove(res.id);
if (res.stage === 'start') {
console.log('!!received start: ', res.id);
//console.log( JSON.stringify(res) );
tlogger[res.id] = new Date();
}else if (res.stage === 'end') {
console.log('!!received end: ', res.id, (new Date() - tlogger[res.id]) );
//console.log( JSON.stringify(res) );
remove(res.id);
clearTimeout(timer);
timer = setTimeout(timeout, interval);
}
});
bindEvent(page, 'error', function(err){
remove(err.id);
if(waiting.length === 0){
counter_retry = 0;
}
});
function remove(id){
var i = waiting.indexOf( id );
if(i < 0){
return;
}else{
waiting.splice(i,1);
}
}
function bindEvent(page, evt, cb){
switch(evt){
case 'request':
page.onResourceRequested = cb;
break;
case 'receive':
page.onResourceReceived = cb;
break;
case 'error':
page.onResourceError = cb;
break;
case 'timeout':
page.onResourceTimeout = cb;
break;
}
}
}
I found this approach useful in some cases:
page.onConsoleMessage(function(msg) {
// do something e.g. page.render
});
Than if you own the page put some script inside:
<script>
window.onload = function(){
console.log('page loaded');
}
</script>
I found this solution useful in a NodeJS app.
I use it just in desperate cases because it launches a timeout in order to wait for the full page load.
The second argument is the callback function which is going to be called once the response is ready.
phantom = require('phantom');
var fullLoad = function(anUrl, callbackDone) {
phantom.create(function (ph) {
ph.createPage(function (page) {
page.open(anUrl, function (status) {
if (status !== 'success') {
console.error("pahtom: error opening " + anUrl, status);
ph.exit();
} else {
// timeOut
global.setTimeout(function () {
page.evaluate(function () {
return document.documentElement.innerHTML;
}, function (result) {
ph.exit(); // EXTREMLY IMPORTANT
callbackDone(result); // callback
});
}, 5000);
}
});
});
});
}
var callback = function(htmlBody) {
// do smth with the htmlBody
}
fullLoad('your/url/', callback);
This is an implementation of Supr's answer. Also it uses setTimeout instead of setInterval as Mateusz Charytoniuk suggested.
Phantomjs will exit in 1000ms when there isn't any request or response.
// load the module
var webpage = require('webpage');
// get timestamp
function getTimestamp(){
// or use Date.now()
return new Date().getTime();
}
var lastTimestamp = getTimestamp();
var page = webpage.create();
page.onResourceRequested = function(request) {
// update the timestamp when there is a request
lastTimestamp = getTimestamp();
};
page.onResourceReceived = function(response) {
// update the timestamp when there is a response
lastTimestamp = getTimestamp();
};
page.open(html, function(status) {
if (status !== 'success') {
// exit if it fails to load the page
phantom.exit(1);
}
else{
// do something here
}
});
function checkReadyState() {
setTimeout(function () {
var curentTimestamp = getTimestamp();
if(curentTimestamp-lastTimestamp>1000){
// exit if there isn't request or response in 1000ms
phantom.exit();
}
else{
checkReadyState();
}
}, 100);
}
checkReadyState();
This the code I use:
var system = require('system');
var page = require('webpage').create();
page.open('http://....', function(){
console.log(page.content);
var k = 0;
var loop = setInterval(function(){
var qrcode = page.evaluate(function(s) {
return document.querySelector(s).src;
}, '.qrcode img');
k++;
if (qrcode){
console.log('dataURI:', qrcode);
clearInterval(loop);
phantom.exit();
}
if (k === 50) phantom.exit(); // 10 sec timeout
}, 200);
});
Basically given the fact you're supposed to know that the page is full downloaded when a given element appears on the DOM. So the script is going to wait until this happens.
I use a personnal blend of the phantomjs waitfor.js example.
This is my main.js file:
'use strict';
var wasSuccessful = phantom.injectJs('./lib/waitFor.js');
var page = require('webpage').create();
page.open('http://foo.com', function(status) {
if (status === 'success') {
page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function() {
waitFor(function() {
return page.evaluate(function() {
if ('complete' === document.readyState) {
return true;
}
return false;
});
}, function() {
var fooText = page.evaluate(function() {
return $('#foo').text();
});
phantom.exit();
});
});
} else {
console.log('error');
phantom.exit(1);
}
});
And the lib/waitFor.js file (which is just a copy and paste of the waifFor() function from the phantomjs waitfor.js example):
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
// console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condi>
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
}
This method is not asynchronous but at least am I assured that all the resources were loaded before I try using them.
This is an old question, but since I was looking for full page load but for Spookyjs (that uses casperjs and phantomjs) and didn't find my solution, I made my own script for that, with the same approach as the user deemstone .
What this approach does is, for a given quantity of time, if the page did not receive or started any request it will end the execution.
On casper.js file (if you installed it globally, the path would be something like /usr/local/lib/node_modules/casperjs/modules/casper.js) add the following lines:
At the top of the file with all the global vars:
var waitResponseInterval = 500
var reqResInterval = null
var reqResFinished = false
var resetTimeout = function() {}
Then inside function "createPage(casper)" just after "var page = require('webpage').create();" add the following code:
resetTimeout = function() {
if(reqResInterval)
clearTimeout(reqResInterval)
reqResInterval = setTimeout(function(){
reqResFinished = true
page.onLoadFinished("success")
},waitResponseInterval)
}
resetTimeout()
Then inside "page.onResourceReceived = function onResourceReceived(resource) {" on the first line add:
resetTimeout()
Do the same for "page.onResourceRequested = function onResourceRequested(requestData, request) {"
Finally, on "page.onLoadFinished = function onLoadFinished(status) {" on the first line add:
if(!reqResFinished)
{
return
}
reqResFinished = false
And that's it, hope this one helps someone in trouble like I was. This solution is for casperjs but works directly for Spooky.
Good luck !
this is my solution its worked for me .
page.onConsoleMessage = function(msg, lineNum, sourceId) {
if(msg=='hey lets take screenshot')
{
window.setInterval(function(){
try
{
var sta= page.evaluateJavaScript("function(){ return jQuery.active;}");
if(sta == 0)
{
window.setTimeout(function(){
page.render('test.png');
clearInterval();
phantom.exit();
},1000);
}
}
catch(error)
{
console.log(error);
phantom.exit(1);
}
},1000);
}
};
page.open(address, function (status) {
if (status !== "success") {
console.log('Unable to load url');
phantom.exit();
} else {
page.setContent(page.content.replace('</body>','<script>window.onload = function(){console.log(\'hey lets take screenshot\');}</script></body>'), address);
}
});
Do Mouse move while page is loading should work.
page.sendEvent('click',200, 660);
do { phantom.page.sendEvent('mousemove'); } while (page.loading);
UPDATE
When submitting the form, nothing was returned, so the program stopped. The program did not wait for the page to load as it took a few seconds for the redirect to begin.
telling it to move the mouse until the URL changes to the home page gave the browser as much time as it needed to change. then telling it to wait for the page to finish loading allowed the page to full load before the content was grabbed.
page.evaluate(function () {
document.getElementsByClassName('btn btn-primary btn-block')[0].click();
});
do { phantom.page.sendEvent('mousemove'); } while (page.evaluate(function()
{
return document.location != "https://www.bestwaywholesale.co.uk/";
}));
do { phantom.page.sendEvent('mousemove'); } while (page.loading);

Iterate over IDs and generate a report pdf for each ID using PhantomJS

This code generates one pdf for the first employee with his id in the url address. I would like to iterate over many ids and generate several pdfs one per each employee with unique id.
The records of employees are is a CSV file which has been read and parsed somewhere else. Also for iteraring over ids I have created an array containing ids which is called idArray. (e.g. idArray = ['123', '127', '156']). Would you please help me create a pdf per id from idArray?
var page = require('webpage').create(),
system = require('system'),
id = system.args[1];
page.open('http://127.0.0.1:3000/report.html?id=' + id, function () {
var intervalHandle;
// poll until
var ready = function () {
var ready = page.evaluate(function () {
return reportReady;
});
if (ready) {
clearInterval(intervalHandle);
page.render('report-id.pdf');
phantom.exit();
} else {
console.log("Not ready yet");
}
};
intervalHandle = setInterval(ready, 100);
});
The problem is that you can't simply iterate over the IDs. page.open() is asynchronous, so you would tell PhantomJS to load the page with the next ID before the previous one can finished loading.
The solution is to use recursion. Define a function that contains the logic to do one iteration and use that to string many callbacks together:
var page = require('webpage').create(),
system = require('system'),
idArray = system.args[1].split(",");
iterate(); // let it run
function iterate() {
var id = idArray.shift(); // changes the idArray
page.open('http://127.0.0.1:3000/report.html?id=' + id, function () {
var intervalHandle;
// poll until
var ready = function () {
var ready = page.evaluate(function () {
return reportReady;
});
if (ready) {
clearInterval(intervalHandle);
page.render('report-id.pdf');
if (idArray.length > 0) {
iterate();
} else {
phantom.exit();
}
} else {
console.log("Not ready yet");
}
};
intervalHandle = setInterval(ready, 100);
});
}
I assume that the IDs are passed in this way:
$ phantomjs script.js 4,8,15,16,23,42

How to trigger Ajax Request for filling a Form out and wait until DOM changes?

I am going to fill out a Form using JQuery through PhantomJS.
I have following script for doing that:
var page = require('webpage').create();
page.open('http://demo.opencart.com/index.php?route=account/register', function() {
fillTheForm();
phantom.exit();
});
function fillTheForm () {
page.evaluate(function() {
var selectTags = new Array();
selectTags = document.getElementsByTagName('select');
$(selectTags[0]).val("38");
$(selectTags[0]).trigger('change');
$(selectTags[1]).val('610');
});
page.render('form.png');
};
after running this script, I got following message inside the console!
Alert, JavaScript error
Also, the picture that I have, after trying to fill the Form out, tells me that the existing values for the second Select box have not changed yet and then PhantomJS could not assign the value to the second field.
Can someone please help me to solve this problem? How can I fill out this two fields using JQuery and PhantomJS?
This is the chained selects problem. The second one is dependent on the first one and is populated via AJAX. This means that it is asynchronous. You have to wait before you can set the second value. In PhantomJS this is also problematic, because you have two contexts (page context and phantom context) that have do be "synchronized".
For example you can use
function fillTheForm () {
page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
$(selectTags[0]).val("38");
$(selectTags[0]).trigger('change');
});
setTimeout(function(){
page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
$(selectTags[1]).val('610');
});
page.render('form.png');
phantom.exit(); // this has to be inside, because everything is asynchronous now
}, 3000); // assuming 3 seconds are enough time for the request
};
A better and more robust way would be to use waitFor from the examples, because it is finished as soon as the data is available. Here I have some indicators when the second select is reloaded:
var page = require('webpage').create();
page.open('http://demo.opencart.com/index.php?route=account/register', function() {
fillTheForm(function(){
page.render('form.png');
phantom.exit();
});
});
function fillTheForm(done) {
page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
// custom indicators, perhaps something more elaborate is needed for general selects
// in this case it is ok
window._chainedSelectChildrenLength = selectTags[1].children.length;
window._chainedSelectFirstChildText = selectTags[1].children[0].innerText;
$(selectTags[0]).val("38");
$(selectTags[0]).trigger('change');
});
waitFor(function testFx(){
return page.evaluate(function() {
var selectTags = document.getElementsByTagName('select');
// use indicators
return window._chainedSelectChildrenLength !== selectTags[1].children.length ||
window._chainedSelectFirstChildText !== selectTags[1].children[0].innerText;
});
}, function onReady(){
page.evaluate(function(){
// continue
var selectTags = document.getElementsByTagName('select');
$(selectTags[1]).val('610');
});
done();
}, 5000); // 3 seconds is the default
};
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};

Categories

Resources