CasperJS Load next page in loop - javascript

I've been working on a script which collates the scores for a list of user from a website. One problem is though, I'm trying to load the next page in the while loop, but the function is not being loaded...
casper.then(function () {
var fs = require('fs');
json = require('usernames.json');
var length = json.username.length;
leaderboard = {};
for (var ii = 0; ii < length; ii++) {
var currentName = json.username[ii];
this.thenOpen("http://www.url.com?ul=" + currentName + "&sortdir=desc&sort=lastfound", function (id) {
return function () {
this.capture("Screenshots/" + json.username[id] + ".png");
if (!casper.exists(x("//*[contains(text(), 'That username does not exist in the system')]"))) {
if (casper.exists(x('//*[#id="ctl00_ContentBody_ResultsPanel"]/table[2]'))) {
this.thenEvaluate(tgsagc.tagNextLink);
tgsagc.cacheCount = 0;
tgsagc.
continue = true;
this.echo("------------ " + json.username[id] + " ------------");
while (tgsagc.
continue) {
this.then(function () {
this.evaluate(tgsagc.tagNextLink);
var findDates, pageNumber;
pageNumber = this.evaluate(tgsagc.pageNumber);
findDates = this.evaluate(tgsagc.getFindDates);
this.echo("Found " + findDates.length + " on page " + pageNumber);
tgsagc.checkFinds(findDates);
this.echo(tgsagc.cacheCount + " Caches for " + json.username[id]);
this.echo("Continue? " + tgsagc["continue"]);
this.click("#tgsagc-link-next");
});
}
leaderboard[json.username[id]] = tgsagc.cacheCount;
console.log("Final Count: " + leaderboard[json.username[id]]);
console.log(JSON.stringify(leaderboard));
} else {
this.echo("------------ " + json.username[id] + " ------------");
this.echo("0 Caches Found");
leaderboard[json.username[id]] = 0;
console.log(JSON.stringify(leaderboard));
}
} else {
this.echo("------------ " + json.username[id] + " ------------");
this.echo("No User found with that Username");
leaderboard[json.username[id]] = null;
console.log(JSON.stringify(leaderboard));
}
});

while (tgsagc.continue) {
this.then(function(){
this.evaluate(tgsagc.tagNextLink);
var findDates, pageNumber;
pageNumber = this.evaluate(tgsagc.pageNumber);
findDates = this.evaluate(tgsagc.getFindDates);
this.echo("Found " + findDates.length + " on page " + pageNumber);
tgsagc.checkFinds(findDates);
this.echo(tgsagc.cacheCount + " Caches for " + json.username[id]);
this.echo("Continue? " + tgsagc["continue"]);
return this.click("#tgsagc-link-next");
});
}
Ok, looking at this code I can suggest a couple of changes you should make:
I don't think you should be calling return from within your function within then(). This maybe terminating the function prematurely. Looking at the casperjs documentation, the examples don't return anything either.
Within your while loop, what sets "tgsagc.continue" to false?
Don't use "continue" as a variable name. It is a reserved word in Javascript used for terminating an iteration of a loop. In your case this shouldn't be a problem, but its bad practice anyhow.
Don't continually re-define the method within your call to the then() function. Refactor your code so that it is defined once elsewhere.

We ended up having to scope the function, so it loads the next page in the loop.
This is mainly because CasperJS is not designed to calculate scores, and it tries to asynchronously do the calculation, missing the required functions

Related

Want to populate datatable row one by one using asynchronous in JavaScript

I am facing a problem where I want to return all the rows one by one without interupting the current function. The issue is intermittent. Sometimes I can get all the datatables data but sometimes not.
Then I started investigating and realised after checking the developer tool in network section that it throws error after page 10. And the error is 429 too many requests. Also, realised that the script was synchronous json call.
429 too many requests
I tried using below code in my script but the spin is not working. Also, found that this is not recommended way.
Can someone help me with solution? Thank you
// Set the global configs to synchronous
$.ajaxSetup({
async: false
});
My Script
function getMed(sDate, eDate) {
var pData;
var firstURL = mUrl + "?sDate=" + moment(sDate).format(dateFormat) + "&eDate=" + moment(eDate).add(1, 'day').format(dateFormat) + "&pNum=1";
....
}).done(function (data) {
if (pData.Number > 0) {
var counter = 0;
// Set the global configs to synchronous
$.ajaxSetup({
async: false
});
var requestsProcessed = 0;
for (var i = 1; i <= pData.Number; i++) {
$("#iconSpin").css('display', 'block');
var pURL = mUrl + "?sDate=" + moment(sDate).format(dateFormat) + "&eDate=" + moment(eDate).add(1, 'day').format(dateFormat) + "&pNum=" + i;
console.log("calling: " + pURL);
var pageRequest = $.getJSON(pURL, function (data) {
requestsProcessed++;
$("#progress").innerHTML = "Fetching batch " + requestsProcessed + " of " + pData.Number + " batches.";
....
}).fail(function () {
$("#iconSpin").css('display', 'none');
console.log("fail for " + pURL + " in " + new Date());
});
console.log("completed for " + pURL + " in " + new Date());
}
} else {
alert("There is no data.");
}
}).fail(function () {
$("#iconSpin").css('display', 'none');
});
}

Sending multiple HTTP GET requests to api with a loop

I'm looking for a way to send many requests to an api using a different api url each time.
An example url for my project is:
http://api.bandsintown.com/artists/Hippo%20Campus/events.json?lapi_version=2.0&app_id=music_matcher
I'm using an HTTP request to pull the JSON info into my script and works perfectly...the first time. However, I want to be able to call it 50-100 ish times (max) in a loop with different artist names in the url (I'm using the BandsInTown API). For some reason, when I try to use a loop to call the http request multiple times, only one output appears and it is unpredictable which element in the order it will be (it's usually the output associated with the first or second element in the array). This is what my code looks like:
// HTTP GET call to BandsInTown API
function httpGetAsync(theUrl, callback) { //theURL or a path to file
var httpRequest = new XMLHttpRequest();
httpRequest.onreadystatechange = function() {
if (httpRequest.readyState == 4 && httpRequest.status == 200) {
var data = JSON.parse(httpRequest.responseText);
if (callback) {
callback(data);
}
}
else {
alert("error loading JSON doc");
}
};
httpRequest.open('GET', theUrl, true);
httpRequest.send(null);
}
//extracts data from api for each artist
function parseEvent(artist) {
var url = "http://api.bandsintown.com/artists/" + artist + "/events.json?lapi_version=2.0&app_id=music_matcher";
httpGetAsync(url, function(data) {
var numEvents = Object.keys(data).length;
//var events = [];
for (var j = 0; j < numEvents; j++) {
document.write(data[j].venue.name + "-> ");
document.write("LAT:" + data[j].venue.latitude + " " + "LNG:" + data[j].venue.longitude);
document.write("ARTIST: " + data[j].artists[0].name);
document.write("DATE: " + data[j].datetime);
document.write(" " + j + " ");
}
});
}
var artists = ["Drake", "Mac Demarco", "Hippo Campus", "STRFKR"];
for (var i = 0; i < artists.length; i++) {
parseEvent(artists[i]);
document.write(" ---NEXT ARTIST--- ");
}
So I can't tell exactly what's going on but things are acting weird with my current code. I don't have a whole lot of javascript and web development experience yet so any help is appreciated! I was preferably looking for a way to implement this with pure javascript. I have had trouble figureing out how to handle Node.js and/or JQuery in Eclipse Neon (the IDE I am using)
You have implemented closure pretty well so clearly this isn't a problem of success callback of one function overwriting response of all others.But now when you look at document.write() it all gets clear, this function first wipes your whole content clean then it writes whatever you told it to .That's why you hardly see anyone use it
`document.write('a');`
`document.write('b');`
`document.write('c');` // a and b are gone you would see only 'c'
So after loop gets over you would only see the output of the last call.Though it's mostly random as to which call would finish last it mostly biased towards some particular value due to the the way servers are tuned.
So better approach is to use some <div> or something and pour your results into it like this one
<div id="op"></div>
and
function parseEvent(artist) {
var url = "http://api.bandsintown.com/artists/" + artist + "/events.json?lapi_version=2.0&app_id=music_matcher";
httpGetAsync(url, function(data) {
var numEvents = Object.keys(data).length;
var op = document.getElementById('op');
op.innerHTML = op.innerHTML + " <br><br> <h2>---NEXT ARTIST---<h2> <br>";
//var events = [];
for (var j = 0; j < numEvents; j++) {
op.innerHTML = op.innerHTML + "<br>" + data[j].venue.name + "-> ";
op.innerHTML = op.innerHTML + "<br>" + "LAT:" + data[j].venue.latitude + " " + "LNG:" + data[j].venue.longitude ;
op.innerHTML = op.innerHTML + "<br>" +"ARTIST: " + data[j].artists[0].name;
op.innerHTML = op.innerHTML + "<br>" +"DATE: " + data[j].datetime;
op.innerHTML = op.innerHTML + "<br>" + " " + j + " <br>";
}
});
}
var artists = ["Drake", "Hippo Campus", "STRFKR","Mac Demarco"];
for (var i = 0; i < artists.length; i++) {
parseEvent(artists[i]);
}

Understanding variable mistakes within closures?

I am learning about closures. This example is given as a common mistake made when making a closure:
function assignTorpedo(name, passengerArray) {
var torpedoAssignment;
for (var i = 0; i<passengerArray.length; i++) {
if (passengerArray[i] == name) {
torpedoAssignment = function() {
alert("Ahoy, " + name + "!\n" +
"Man your post at Torpedo #" + (i+1) + "!");
};
}
}
return torpedoAssignment;
}
Since the for loop completes before the closure is returned, the i value will not match with the name. So, I understand that the loop continues on before the return happens.
My question comes from this, an example of the correct way to do things:
function makeTorpedoAssigner(passengerArray) {
return function (name) {
for (var i = 0; i<passengerArray.length; i++) {
if (passengerArray[i] == name) {
alert("Ahoy, " + name + "!\n" +
"Man your post at Torpedo #" + (i+1) + "!");
}
}
};
}
I don't understand why in the above example the for loop wouldn't also continue past the first time it finds a match, which would result in another mismatched i. I understand that return stops a function, but I don't understand the connection between the return and that first match since they don't happen together (visually). I understand how the code knew to stop if that return was within the if function or the for loop.
I don't understand why in the above example the for loop wouldn't also continue past the first time it finds a match
It would.
which would result in another mismatched i.
It wouldn’t, because it checks if (passengerArray[i] == name) every time. That’s wasteful, though; it’s an unusual fix. A better way would be to pass the index:
function makeTorpedoAssigner(passengerArray, i) {
return function (name) {
alert("Ahoy, " + name + "!\n" +
"Man your post at Torpedo #" + (i+1) + "!");
};
}
function assignTorpedo(name, passengerArray) {
for (var i = 0; i<passengerArray.length; i++) {
if (passengerArray[i] == name) {
return makeTorpedoAssigner(passengerArray, i);
}
}
}
What happens here is :
assignTorpedo() returns a function based on name, So, every time it
checks for name in passengerArray and returns a function, but before
assignTorpedo could return torpedoAssignment, value of i would have
changed to the last value (length-1 of passengerArray), as loop will continue executing.
function assignTorpedo(name, passengerArray) {
var torpedoAssignment;
for (var i = 0; i<passengerArray.length; i++) {
if (passengerArray[i] == name) {
torpedoAssignment = function() {
alert("Ahoy, " + name + "!\n" +
"Man your post at Torpedo #" + (i+1) + "!");
// value of i
};
}
}
// value of i = length of Array since loop has executed fully
return torpedoAssignment;
}
Right approach explained :
Here you are returning a function which takes a name and checks each
time in the array, the concept of closure here is that, even though
function(name) is returned, it would remember passengerArray (if you
will see passengerArray is not passed everytime, but no error is
thrown. This is closure.)
function makeTorpedoAssigner(passengerArray) {
return function (name) {
for (var i = 0; i<passengerArray.length; i++) {
if (passengerArray[i] == name) {
alert("Ahoy, " + name + "!\n" +
"Man your post at Torpedo #" + (i+1) + "!");
//value of i
}
}
};
}

Using casperjs and phantomjs to scrape multiple pages

I'm trying to scrape a number of pages that have a standard format. I've been able to use Phantomjs to successfully scrape a single page, but when I try to iterate over multiple ones, the asynchronous processing makes things hang up. What's the proper way to tell Casper/Phantom to wait?
var page = require('webpage').create();
var fs = require('fs');
page.onConsoleMessage = function(msg) {
phantom.outputEncoding = "utf-8";
console.log(msg);
};
// this overwrites the previous output file
f = fs.open("lat_long.txt", "w");
f.write("--");
f.close();
// this is the unique identifier for the locations. For now, I just have three datapoints
var EPAID = ["KYD980501076","ME8170022018", "MEN000103584"];
/// this code will be used to loop through the different locations. For now, set to look at only one.
for (q= 0; q < 1; q++) {
var processing = false;
//we construct the target url
var url = "http://iaspub.epa.gov/enviro/efsystemquery.cerclis?fac_search=site_epa_id&fac_value=" + EPAID[0] + "&fac_search_type=Beginning+With&postal_code=&location_address=&add_search_type=Beginning+With&city_name=&county_name=&state_code=&program_search=1&report=2&page_no=1&output_sql_switch=TRUE&database_type=CERCLIS" ;
page.open(url);
page.onLoadFinished = function(status) {
if ( status === "success" ) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var str = page.evaluate(function() {
$value = [];
$Object = $(".result tr");
for (i =0 ; i < 10; i++) {
$value.push($Object.find('td').html(),$Object.find('td').next().next().html() );
$Object = $Object.next();
}
$string = "{ EPAID: "+ $value[0] + ", " +
"Name: "+ $value[1] + ", " +
"City: "+ $value[4] + ", " +
"State: "+ $value[6] + ", " +
"ZipCode: "+ $value[8] + ", " +
"Latitude: "+ $value[14] + ", " +
"Longitude: "+ $value[16] + " }" ;
return $string;
});
f = fs.open("lat_long.txt", "a");
f.write(str);
f.close();
processing = true;
console.log("writing to file");
phantom.exit();
});
}
// right here it should delay until the previous page is completed
// while (!processing) {
// setTimeout(function(){ console.log("waiting....");},1000);
// }
};
}
console.log("finished all pages");
If you switched to using casperJS, it is as simple as changing your page.open() into page.thenOpen(). (This CasperJS - How to open up all links in an array of links question looks very similar to yours?)
If you wanted to stick with PhantomJS you need to start the next page load in the onSuccess callback of the previous load. This is tedious, and needs care to avoid large memory usage. (I did it once or twice, but now simply use CasperJS.)
An alternative approach is to create the page object inside the loop. However that is not quite answering your question, as then they will run in parallel. But you could use setTimeout to stagger each once to avoid a burst of activity if you have hundreds of URLs!
Here is the code that ultimately works (using the timeout approach since I wasn't able to get the success callback to work better).
With casperjs installed, I named this file "process.js" and was able to run it from the command line as "casperjs process.js"
var page = require('webpage').create();
var fs = require('fs');
page.onConsoleMessage = function(msg) {
phantom.outputEncoding = "utf-8";
console.log(msg);
};
// this overwrites the previous output f
// this is the unique identifier for the locations.
var EPAID = ["NED981713837",... , "FLD049985302", "NJD986643153"];
f = fs.open("lat_long.txt", "w");
f.write("-<>-");
f.close();
var count = 0;
var target = 1400;
var written = [];
function yourFunction(){
if (count < target) {
process(count);
count++;
setTimeout(yourFunction, 5000);
} else {
console.log("exiting");
phantom.exit();
return;
}
}
function process(counter){
var processing = false;
console.log("Beginning record #" + counter);
//we construct the target url
var url = "http://iaspub.epa.gov/enviro/efsystemquery.cerclis?fac_search=site_epa_id&fac_value=" + EPAID[counter] + "&fac_search_type=Beginning+With&postal_code=&location_address=&add_search_type=Beginning+With&city_name=&county_name=&state_code=&program_search=1&report=2&page_no=1&output_sql_switch=TRUE&database_type=CERCLIS" ;
page.open(url);
page.onLoadFinished = function(status) {
if ( status === "success" ) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var str = page.evaluate(function() {
$value = [];
$Object = $(".result tr");
for (i =0 ; i < 10; i++) {
$value.push($Object.find('td').html(),$Object.find('td').next().next().html() );
$Object = $Object.next();
}
$string = "{ \"EPAID\": \""+ $value[0] + "\", " +
"\"Name\": \""+ $value[1] + "\", " +
"\"City\": \""+ $value[4] + "\", " +
"\"State\": \""+ $value[6] + "\", " +
"\"ZipCode\": \""+ $value[8] + "\", " +
"\"Latitude\": "+ $value[14] + ", " +
"\"Longitude\": "+ $value[16] + " }," ;
return $string;
});
if (written[counter] === undefined) {
f = fs.open("lat_long.txt", "a");
f.write(str);
f.close();
written[counter] = true;
console.log("Writing to file #"+ counter);
}
});
}
};
}
console.log("Start...");
yourFunction();

Function Scope issue when declaring an array of objects

This is the code that I currently have, one problem that is happening is I cannot use test() because presets[index].name and value are not visible outside of their function scope, how should I declare my array of objects in the global scope in order for me to be able to access these two variables in other functions?
var presets = [];
var index;
function CreatePresetArray(AMib, AVar) {
var parentpresetStringOID = snmp.getOID(AMib, AVar);
var presetStringOID = parentpresetStringOID;
parentpresetStringOID = parentpresetStringOID.substring(0, parentpresetStringOID.length - 2);
log.error("parentpresetStringOID is " + parentpresetStringOID);
var presetswitches = {};
for (var i = 1; i < 41; i++) {
presets.push(presetswitches);
try {
log.error("presetStringOID before getNextVB= " + presetStringOID);
vb = snmp.getNextVB(presetStringOID);
presetStringOID = vb.oid;
log.error("presetStringOID after getnextVB= " + presetStringOID);
var presetStringVal = snmp.get(presetStringOID);
log.error("presetStringVal= " + presetStringVal);
index = i - 1;
presets[index].name = presetStringOID;
presets[index].value = presetStringVal;
log.error("preset array's OID at position [" + index + "] is" + presets[index].name + " and the value stored is " + presets[index].value);
//log.error("presets Array value ["+index+"] = "+presets[index].configs);
if (presetStringOID.indexOf(parentpresetStringOID) != 0) {
break;
}
} catch (ie) {
log.error("couldn't load preset array " + index);
};
};
}
CreatePresetArray(presetMib, "presetString");
function test() {
for (i = 1; i < 41; i++) {
log.error("test" + presets[index].name + " " + presets[index].value);
};
}
test();
The for loop in your function test iterates over i but uses index inside the loop. Perhaps you meant to use
for (i = 0; i < 40; i++) { // 1 lower as you were using `index = i - 1` before
log.error("test" + presets[i].name + " " + presets[i].value);
}
Re-wrote your code. I don't think I made that much by way of change. If this doesn't clear up your problem, consider: Is the catch happening each iteration? Is the problem actually coming from a different method which is only visible here? Also, consider logging the whole presets Array when debugging to see what it looks like.
var presets = [];
function CreatePresetArray(AMib, AVar) {
var parentPresetOID, presetOID, presetValue, preset, vb, i;
parentPresetOID = snmp.getOID(AMib, AVar);
presetOID = parentPresetOID; // initial
parentPresetOID = parentPresetOID.substring(0, parentPresetOID.length - 2);
log.error("parentPresetOID is " + parentPresetOID);
presets = []; // empty array in case not already empty
for (i = 0; i < 40; ++i) {
try {
preset = {}; // new object
// new presetOID
vb = snmp.getNextVB(presetOID);
presetOID = vb.oid;
log.error("presetOID after getnextVB= " + presetOID);
// new value
presetValue = snmp.get(presetOID);
log.error("presetValue= " + presetValue);
// append data to object
preset.name = presetOID;
preset.value = presetValue;
// append object to array
presets.push(preset);
// more logging
log.error(
"preset array's OID at position [" + i + "]" +
" is" + presets[i].name + " and " +
"the value stored is " + presets[i].value
);
if (presetOID.indexOf(parentPresetOID) !== 0) {
break;
}
} catch (ie) {
log.error("couldn't load preset array " + i);
if (presets.length !== i + 1) { // enter dummy for failed item
presets.push(null);
}
}
}
}
Two options come to mind immediately:
you could pass the preset array as a argument to test().
You could put both CreatePresetArray() and test() inside a wrapper function and declare preset array at the top of your wrapper. That would give them both access to the variable.
It's generally considered Bad Form to declare globals if it can be avoided. Pollutes the namespace.

Categories

Resources