JavaScript/CasperJS handling of timeouts when looping over pages - javascript

I have a problem with this script. It is supposed to load some links (line by line) saved in prova.txt then pass the links one by one to CasperJS and get the html of the page. I know that there must be some problem with timeouts/JavaScript.
Here is the script:
var fs = require('fs');
var file_h = fs.open('prova.txt', 'r');
var line = file_h.readLine();
var links = new Array();
var casper = require('casper').create();
while(line) {
line = file_h.readLine();
links.push(line);
}
(function theLoop (i) {
console.log("LOOP");
casper.start(links[i], function() {
setTimeout(function () {
fs.write("stats" + i + ".html", this.getHTML() );
i = i + 1;
if (--i) {
theLoop(i);
}
}, 2000);
});
casper.run();
})(4);
Documentation that I used: http://scottiestech.info/2014/07/01/javascript-fun-looping-with-a-delay/

Don't call start and run multiple times on the same casper instance.
casper.start();
(function theLoop (i) {
console.log("LOOP");
casper.thenOpen(links[i], function() {
this.wait(2000, function () {
fs.write("stats" + i + ".html", this.getHTML() );
if (--i) {
theLoop(i);
}
});
});
})(4);
casper.run();
Additionally, it seems like you want to decrease i to 0, so you shouldn't increment (i = i + 1) and decrement it --i in the next line.
Keep in mind that if you use setTimeout in a CasperJS script, you're breaking out of the stepped control flow and have to catch the broken off execution somehow. Use CasperJS' capabilities until it becomes inevitable. For example, I replaced setTimeout(function(){}, x) with casper.wait(x, function(){}).

Related

Several setTimeout() functions [duplicate]

I'm trying to send emails with a 10 seconds delay between. I wrote this code:
$(document).ready(function() {
for (i = 0; i < 20; i++) {
setTimeout("SendEmail(" + i + ")", 5000);
}
});
function SendEmail(id) {
$.get("mimesender.php?id=" + id, function(data) {
var toAppend = "<span> " + data + "</span>"
$("#sentTo").append(toAppend);
});
}
server side code(php) gets the id and selects the email with the specified id from the database
$query="select email from clienti where id =".$id;
then sends the email, and sends back the current email
echo email;
However, something is wrong here. It seems like the the js function waits for 5 seconds, and then displays all the 20 email addresses at once.
Can you tell me what I'm doing wrong ? any "sleep "workaround will be greatly appreciated :)
Use interval instead of loop.
Working demo: http://jsfiddle.net/xfVa9/2/
$(document).ready(function() {
var tmr;
var i=0;
tmr=setInterval(function(){
if(i<20){
SendEmail(i);
alert("Sent "+i)
i++;
}else{
clearInterval(tmr);
}
},5000)
});
You should create a function which calls itself after 5 seconds
var i=0;
function sendEmailNow() {
SendEmail(i);
++i;
if(i<20) {
setTimeout(sendEmailNow, 5000);
}
}
What happens is that you call setTimeout 20 times, just after one another, with a timeout of 5 seconds. So naturally, all emails gets sent at once. You could change the loop to look like this:
for (i=0;i<20;i++) {
setTimeout("SendEmail("+ i + ")",(i+1)*5000);
}
There's alot of other options though, and they'd depend on just what suits your specific problem best.
First, pass a function to setTimeout.
Secondly, you'd be better off if you set the timeout for the next one in the queue after the current one is finished.
In the for loop:
sendEmail(0); // start sending first
and in the callback:
, function(data) {
if(id < 19) { // if next should be sent
setTimeout(function() {
SendEmail(id + 1);
}, 5000);
}
var toAppend = "<span> " + data + "</span>"
$("#sentTo").append(toAppend);
}
Your loop is setting up 20 timers to wait 5 seconds, then letting them all go at once.
Try something like this:
var email_count = 20;
var sendMails = function(){
SendEmail(email_count--);
if(email_count > 0){
setTimeout(sendMails, 5000);
}
}
setTimeout(sendMails, 5000)
Avoid jQuery. Learn JavaScript.
var i = 0; (otherwise leak into outer scope or runtime error)
Extra closure:
window.setTimeout(
(function (j) {
return function () {
sendEmail(j);
};
}(i)),
i * 10000);
sendEmail (code style: not a constructor)
You really want to escape $id in the server-side code to prevent SQL injection.

JavaScript URL looper

I am trying create a page that will run some given URLs, in time this will be a database populated list of URLs loaded into an array. I have it working using some code I have altered, however I don't want it to keep looping, I just want it to stop once the list is complete.
So basically loads first URL, waits .5 of a second after the pages loads, then moves onto the next until the list of URLS is finished.
However, my code keeps looping. How do I prevent this?
var urls1 =
[
'http://localhost:8500/SupportTools/t1.cfm',
'http://localhost:8500/SupportTools/t2.cfm',
'http://localhost:8500/SupportTools/t3.cfm',
'http://localhost:8500/SupportTools/t4.cfm',
'http://localhost:8500/SupportTools/t5.cfm'
];
function showUrl1(idx)
{
var f1 = document.getElementById("f1");
f1.onload = function()
{
var next = ++idx % urls1.length;
setTimeout(function()
{
showUrl1(next);
}, 500);
}
f1.src = urls1[idx];
}
function start()
{
showUrl1(0);
}
<iframe class="show1" id="f1" src="about:blank"></iframe>
The reason it's looping is because you're using the remainder operator %. Just check the to see if next is greater than or equal to the length and then don't call setTimeout(). I've changed the snippet so that you can see it output in the console but you should get the idea.
var urls1 = [
'http://localhost:8500/SupportTools/t1.cfm',
'http://localhost:8500/SupportTools/t2.cfm',
'http://localhost:8500/SupportTools/t3.cfm',
'http://localhost:8500/SupportTools/t4.cfm',
'http://localhost:8500/SupportTools/t5.cfm'
];
function showUrl1(idx) {
if (idx >= urls1.length) {
return;
}
console.log(urls1[idx]);
var next = ++idx;
setTimeout(function() {
showUrl1(next);
}, 500);
}
function start() {
showUrl1(0);
}
start();
Dont showUrl when the list ends
function showUrl1(idx)
{
var f1 = document.getElementById("f1");
f1.onload = function()
{
var next = idx === urls1.length? null: ++idx ;
setTimeout(function()
{ if(next != null){
showUrl1(next);
}
}, 500);
}
f1.src = urls1[idx];
}

How to break CasperJS' repeat function when a condition is fulfilled?

I want to know if i can break a casper.repeat loop.
I have this script which does this..:
Searches google for agent 001, agent 002, agent 003, agent 004, agent 005, agent 006..... ....'til agent 011.
I want it to stop looping after it finds the text "James Bond".
Now it finds it, prints it out, but i dont know if and how to stop the casper.repeat loop.
var casper = require("casper").create({
clientScript: ["jquery.min.js"],
verbose: true,
logLevel: "info"
});
var mouse = require("mouse").create(casper);
var x = require('casper').selectXPath;
var webPage = require('webpage');
var page = webPage.create();
casper.on("remote.message", function(msg){
this.echo("remote> " + msg);
var test = msg;
if( test.indexOf('James Bond') >= 0){
casper.echo("Am Gasit James Bond");
}
});
casper.userAgent('Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36')
casper.start("https://www.google.com/ncr", function() {
this.echo("\nINCEPUTUL INCEPUTULUI TITLUL PAGINII IN START (LINIA DE MAI JOS)\n"+this.getTitle()+"\n");
}).viewport(1366,768);
casper.options.waitTimeout = 30000;
var variabila_mea = "agent ";
var numTimes = 11, count = 1;
casper.repeat(numTimes, function() {
if (count < 10) {
var i = "00"+count;
casper.waitForSelector(x('//*[#id="gbqfq"]'), function(){
this.evaluate(function(count, variabila_mea, i, numar) {
document.getElementsByClassName('gbqfif')[0].value=variabila_mea+i;
document.forms[0].submit();
nextPage(count);
}, ++count,variabila_mea , i,"00000");
console.log(variabila_mea);
casper.waitForSelector(x('//*[#id="gbqfq"]'), function(){
var inputValue = casper.evaluate(function () {
console.log("\n\n\n"+document.getElementsByClassName('rc')[0].outerHTML+"\n\n\n");
});
});
casper.wait(1000, function(){
console.log("\n_____________________");
casper.capture('aa'+i+'.png');
console.log("_____________________\n");
});
});
} else if (count < 100 && count > 9) {
var i = "0"+count;
casper.waitForSelector(x('//*[#id="gbqfq"]'), function(){
this.evaluate(function(count, variabila_mea, i, numar) {
document.getElementsByClassName('gbqfif')[0].value=variabila_mea+i;
document.forms[0].submit();
nextPage(count);
}, ++count,variabila_mea , i,"00000");
console.log(variabila_mea);
casper.waitForSelector(x('//*[#id="gbqfq"]'), function(){
var inputValue = casper.evaluate(function () {
console.log("\n\n\n"+document.getElementsByClassName('rc')[0].outerHTML+"\n\n\n");
});
});
casper.wait(1000, function(){
console.log("\n_____________________");
casper.capture('aa'+i+'.png');
console.log("_____________________\n");
});
});
} else {
var i = count;
casper.waitForSelector(x('//*[#id="gbqfq"]'), function(){
this.evaluate(function(count, variabila_mea, i, numar) {
document.getElementsByClassName('gbqfif')[0].value=variabila_mea+i;
document.forms[0].submit();
nextPage(count);
}, ++count,variabila_mea , i,"00000");
console.log(variabila_mea);
casper.waitForSelector(x('//*[#id="gbqfq"]'), function(){
var inputValue = casper.evaluate(function () {
console.log("\n\n\n"+document.getElementsByClassName('rc')[0].outerHTML+"\n\n\n");
});
});
casper.wait(1000, function(){
console.log("\n_____________________");
casper.capture('aa'+i+'.png');
console.log("\n_____________________");
});
});
}
});
casper.run();
The repeat loop is finished before the navigation has even begun. repeat is executed immediately by unrolling all the iterations into a queue before the execution is even triggered through casper.run. You cannot break a repeat loop when synchronous code is used and you certainly cannot break a repeat loop when steps are actually scheduled. But you can still achieve what you want to achieve by using recursion.
You need to define your looping body as a function and make it tail recursive.
function loopBody(index, numTimes){
if (conditionFailed || index >= numTimes) {
return;
}
this.then(function(){
// do something useful
});
this.then(function(){
loopBody.call(this, index+1, numTimes);
});
}
casper.start(url).then(function(){
loopBody.call(this, 0, numTimes);
}).run();
In your case a global variable is probably needed to track what is found in each "loop" iteration.
You should understand the working of casper.repeat();
Internal structure of repeat is below. Taken from git hub
/**
* Repeats a step a given number of times.
* #param Number times Number of times to repeat step
* #aram function then The step closure
* #return Casper
* #see Casper#then
*/
Casper.prototype.repeat = function repeat(times, then) {
"use strict";
for (var i = 0; i < times; i++) {
this.then(then);
}
return this;
};
From this you can see that once repeat is called ie repeat (numTimes,your function).
You can consider "then" parameter as what ever the function you are passing to repeat. So once the call is happened it will work inside a for loop. So Breaking the Repeat after the call is not possible. Use Casper.byPassIf Casper.byPassUnless inside your function which you are passing to the repeat in order to bypass the execution.
Using this you an skip the steps inside your function. Which means repeat will stop only after reaching it end count. But it won't perform any steps because you are bypassing those with above mentioned functions. Further explanation of bypass function not comes under this scope of question. Actually it is work around you can try. Hope it will be helpful. If needed further clarification please feel free to ask. Enjoy !!!
Example the below loop will execute 10 times but only print 1 to 5.
var i=1;
loopcount=10
casper.repeat(loopCount, function() {
casper.thenBypassIf(function() {
return i >5;
}, 1);
casper.then(function() {
this.echo (i);
});
casper.then(function() {
i++;
});
});
How about this?
casper.keep_looping = true;
casper.repeat(some_counter, function() {
if (casper.keep_looping)
{
if (i_no_longer_want_to_loop) {
casper.keep_looping = false;
}
}
});
Since there is no way to break the casper.repeat loop (at least I haven't found it in the docs), then maybe avoiding the execution of the in-loop code is enough. The code above will run the loop until the "some_counter" is exhausted but the code in the loop will not be executed if you set the "i_no_longer_want_to_loop" variable to true.

How to iterate through file system directories and files using javascript?

I'm using Javascript to write an application that will be used with Phonegap to make an Android application. I'm using the Phonegap File API to read directories and files. The relevant code is shown below:
document.addEventListener("deviceready", onDeviceReady, false);
// PhoneGap is ready
//
function onDeviceReady() {
window.requestFileSystem(LocalFileSystem.PERSISTENT, 0, onFileSystemSuccess, fail);
}
function onFileSystemSuccess(fileSystem) {
fileSystem.root.getDirectory("/sdcard", {create: false, exclusive: false}, getDirSuccess, fail);
}
function getDirSuccess(dirEntry) {
// Get a directory reader
var directoryReader = dirEntry.createReader();
// Get a list of all the entries in the directory
directoryReader.readEntries(readerSuccess,fail);
}
var numDirs = 0;
var numFiles = 0;
function readerSuccess(entries) {
var i;
for (i=0; i<entries.length; i++)
{
if(entries[i].isFile === true)
{
numFiles++;
entries[i].file(fileSuccess,fail);
}
else if (entries[i].isDirectory === true)
{
numDirs++;
getDirSuccess(entries[i]);
}
}
}
So as of now, the program works fine. The reader will read the contents of the /sdcard directory..if it encounters a file, it will call fileSuccess (which I've excluded in the code for brevity), and if it encounters another directory, it will call getDirSuccess again. My question is this: How can I know when the entire /sdcard directory is read? I can't think of a good way of accomplishing this without going through the /sdcard directory more than one time. Any ideas are appreciated, and thank you in advance!
+1 on a good question since I have to do this anyway myself. I would use the old setTimeout trick. Once the cancel doesn't occur anymore, you know you are done and can fire your event, but just ensure its only fired once.
Here's what I mean and I've named the variables long simply to be more readable (not my style)...
// create timeout var outside your "readerSuccess" function scope
var readerTimeout = null, millisecondsBetweenReadSuccess = 100;
function readerSuccess(entries) {
var i = 0, len = entries.length;
for (; i < len; i++) {
if (entries[i].isFile) {
numFiles++;
entries[i].file(fileSuccess,fail);
} else if (entries[i].isDirectory) {
numDirs++;
getDirSuccess(entries[i]);
}
if (readerTimeout) {
window.clearTimeout(readerTimeout);
}
}
if (readerTimeout) {
window.clearTimeout(readerTimeout);
}
readerTimeout = window.setTimeout(weAreDone, millisecondsBetweenReadSuccess);
}
// additional event to call when totally done
function weAreDone() {
// do something
}
So the logic in this is you keep cancelling the "weAreDone" function from being called as you are reading through stuff. Not sure if this is the best way or more efficient but it would not result in more than one loop given the appropriate "millisecondsBetweenReadSuccess".
Instead of using a setTimeout, which can fail if you have a very slow device, you can use a counter to see how many callbacks still need to be called. If the counter reaches zero, you're all done :)
This is the recursive code:
var fileSync = new function(){
this.filesystem = null;
this.getFileSystem = function(callback){
var rfs = window.requestFileSystem || window.webkitRequestFileSystem;
rfs(
1// '1' means PERSISTENT
, 0// '0' is about max. storage size: 0==we don't know yet
, function(filesystem){
fileSync.filesystem = filesystem;
callback(filesystem);
}
, function(e){
alert('An error occured while requesting the fileSystem:\n\n'+ e.message);
}
);
}
this.readFilesFromReader = function(reader, callback, recurse, recurseFinishedCallback, recurseCounter)
{
if (recurse && !recurseCounter)
recurseCounter = [1];
reader.readEntries(function(res){
callback(res);
if (recurse)
{
for (var i=0; i<res.length; i++) {
/* only handle directories */
if (res[i].isDirectory == true)
{
recurseCounter[0]++;
fileSync.readFilesFromReader(res[i].createReader(), callback, recurse, recurseFinishedCallback, recurseCounter);
}
}
}
/* W3C specs say: Continue calling readEntries() until an empty array is returned.
* You have to do this because the API might not return all entries in a single call.
* But... Phonegap doesn't seem to accommodate this, and instead always returns the same dir-entries... OMG, an infinite loop is created :-/
*/
//if (res.length)
// fileSync.readFilesFromReader(reader, callback, recurse, recurseFinishedCallback, recurseCounter);
//else
if (recurse && --recurseCounter[0] == 0)
{
recurseFinishedCallback();
}
}
, function(e){
fileSync.onError(e);
if (recurse && --recurseCounter[0] == 0)
recurseFinishedCallback();
});
};
this.onError = function(e){
utils.log('onError in fileSync: ' + JSON.stringify(e));
if (utils.isDebugEnvironment())
alert('onError in fileSync: '+JSON.stringify(e));
}
}
var utils = new function(){
this.log = function(){
for (var i=0;i<arguments.length;i++)
console.log(arguments[i]);
}
this.isDebugEnvironment = function(){ return true }// simplified
}
Example code to test this:
var myFiles = [];
var directoryCount = 0;
window.onerror = function(){ alert('window.onerror=\n\n' + arguments.join('\n')) }
var gotFilesCallback = function(entries)
{
for (var i=0;i<entries.length;i++)
{
if (entries[i].isFile == true)
myFiles.push(entries[i].fullPath)
else
++directoryCount;
}
}
var allDoneCallback = function(){
alert('All files and directories were read.\nWe found '+myFiles.length+' files and '+directoryCount+' directories, shown on-screen now...');
var div = document.createElement('div');
div.innerHTML = '<div style="border: 1px solid red; position: absolute;top:10px;left:10%;width:80%; background: #eee;">'
+ '<b>Filesystem root:</b><i>' + fileSync.filesystem.root.fullPath + '</i><br><br>'
+ myFiles.join('<br>').split(fileSync.filesystem.root.fullPath).join('')
+ '</div>';
document.body.appendChild(div);
}
/* on-device-ready / on-load, get the filesystem, and start reading files */
var docReadyEvent = window.cordova ? 'deviceready':'load';
document.addEventListener(docReadyEvent, function()
{
fileSync.getFileSystem(function(filesystem){
var rootDirReader = filesystem.root.createReader();
fileSync.readFilesFromReader(rootDirReader, gotFilesCallback, true, allDoneCallback);
})
}, false);

This javascript setTimeout interacts with ajax requests in a really weird way

I'm writing this script so that it displays the status of an import script. It's supposed to call a function, that runs a http request, every X seconds.
function progres_import() {
//if(import_status != 'finalizat') {
alert("progres_import");
setTimeout(function() { return update_progres_import(); }, 2000);
setTimeout(function() { return update_progres_import(); }, 4000);
setTimeout(function() { return update_progres_import(); }, 6000);
setTimeout(function() { return update_progres_import(); }, 8000);
//setTimeout(function() { progres_import(); }, 400);
//}
//else {
//}
}
this is what i used to test the functionality. I put the comments in too just to show what I intend to ultimately do with it. I tried all the possible setTimeout calls, with quotes, without quotes, with and without the anonymous function.
var xmlhttp_import_progres;
function update_progres_import() {
xmlhttp_import_progres=GetXMLHttpObject();
if (xmlhttp_import_progres==null) {
alert ("Browser does not support HTTP Request (xmlhttp_import_progres)");
return;
}
var url="crm/ferestre/import_progres.php";
url=url+"?sid="+Math.random();
xmlhttp_import_progres.onreadystatechange=function() {
if (xmlhttp_import_progres.readyState == 4) {
progres_resp = xmlhttp_import_progres.responseText;
progres = progres_resp.split('_');
import_nrc = progres[0];
import_nrt = progres[1];
import_status = progres[2];
mesaj = 'Progres import: ' + import_nrc + ' / ' + import_nrt;
//document.getElementById("corp_import_mesaj").innerHTML = mesaj;
alert(progres_resp);
}
};
xmlhttp_import_progres.open("POST",url,true);
xmlhttp_import_progres.send(null);
}
this is the business end of the progres_import function.
what happens is i get the alert("progress_import") in the first function right as the import process starts, but the alert(progres_resp) in the second one starts popping up only after the import process is over (it still maintains the 2 second interval so in that sense the setTimeouts worked).
the php script in the ajax request just takes some session variables that the import script sets and prints them for the javascript to use (x imports of y total, z failed, stuff like this)
Any idea why it behaves like this?
xmlhttp_import_progres.readyState == 4) is only true at the end of the request. Hence, your alert dialogs pop up after finishing the request.
Furthermore, you can't expect your function to show alerts after a 2 second interval, because the server may or may not respond as fast.
A final note: If you want to have a periodical update function, use setInterval(function(){...}, 2000).
EDIT
Also, add var in this way: var xmlhttp_import_progres = GetXMLHttpObject();. Currently, you're globally defining the HTTP object, causing only one instance of the HTTP object to be accessible.
Here, can you try to edit just a little:
Please consider the above answer, but this code will make clear for you:
function progres_import() {
//if(import_status != 'finalizat') {
alert("progres_import");
setTimeout(function() { return update_progres_import(0); }, 2000);
setTimeout(function() { return update_progres_import(1); }, 4000);
setTimeout(function() { return update_progres_import(2); }, 6000);
setTimeout(function() { return update_progres_import(3); }, 8000);
//setTimeout(function() { progres_import(); }, 400);
//}
//else {
//}
}
AND
var xmlhttp_import_progres = [];
function update_progres_import(i) {
xmlhttp_import_progres[i]= GetXMLHttpObject();
if (xmlhttp_import_progres[i]==null) {
alert ("Browser does not support HTTP Request (xmlhttp_import_progres)");
return;
}
var url="crm/ferestre/import_progres.php";
url=url+"?sid="+Math.random();
xmlhttp_import_progres[i].onreadystatechange=function() {
if (xmlhttp_import_progres[i].readyState == 4) {
progres_resp = xmlhttp_import_progres[i].responseText;
progres = progres_resp.split('_');
import_nrc = progres[0];
import_nrt = progres[1];
import_status = progres[2];
mesaj = 'Progres import: ' + import_nrc + ' / ' + import_nrt;
//document.getElementById("corp_import_mesaj").innerHTML = mesaj;
alert(progres_resp);
}
};
xmlhttp_import_progres[i].open("POST",url,true);
xmlhttp_import_progres[i].send(null);
}

Categories

Resources