Can't pass array items to function in PhantomJS - javascript

I am trying to pull the source code to several webpages at once. The links are fed into the array via a source text file. I am able to iterate through the array and print out the links and confirm they are there, but when trying to pass them through a function, they become undefined after the first iteration.
My ultimate goal is to have it save the source of each page to its own document. It does the first page correctly, but subsequent attempts are undefined. I've searched for hours but would appreciate it if someone could point me in the right direction.
var fs = require('fs');
var pageContent = fs.read('input.txt');
var arrdata = pageContent.split(/[\n]/);
var system = require('system');
var page = require('webpage').create();
var args = system.args;
var imagelink;
var content = " ";
function handle_page(file, imagelink){
page.open(file,function(){
var js = page.evaluate(function (){
return document;
});
fs.write(imagelink, page.content, 'w');
setTimeout(next_page(),500);
});
}
function next_page(imagelink){
var file = imagelink;
if(!file){phantom.exit(0);}
handle_page(file, imagelink);
}
for(var i in arrdata){
next_page(arrdata[i]);
}
I realize now that having that the for loop will only iterate once, then the other two functions make their own loop, so that makes sense, but still having issues getting it running.

PhantomJS's page.open() is asynchronous (that's why there is a callback). The other thing is that page.open() is a long operation. If two such calls are made the second will overwrite the first one, because you're operating on the same page object.
The best way would be to use recursion:
function handle_page(i){
if (arrdata.length === i) {
phantom.exit();
return;
}
var imageLink = arrdata[i];
page.open(imageLink, function(){
fs.write("file_"+i+".html", page.content, 'w');
handle_page(i+1);
});
}
handle_page(0);
Couple of other things:
setTimeout(next_page(),500); immediately invokes next_page() without waiting. You wanted setTimeout(next_page, 500);, but then it also wouldn't work, because without an argument next_page simply exits.
fs.write(imagelink, page.content, 'w') that imagelink is probably a URL in which case, you probably want to define another way to devise a filename.
While for(var i in arrdata){ next_page(arrdata[i]); } works here be aware that this doesn't work on all arrays and array-like objects. Use dumb for loops like for(var i = 0; i < length; i++) or array.forEach(function(item, index){...}) if it is available.
page.evaluate() is sandboxed and provides access to the DOM, but everything that is not JSON serializable cannot be passed out of it. You will have to put that into a serializable format before passing it out of evaluate().

Related

how to pass javascript variable from one js to another js without jquery?

I am beginner in web development. I am trying to send variable from one js file to another js file .
I researched alot before posting this question. I couldn't find any particular method by which i can do this.
My script which defines variable is
function getEventData(){
var eventList = document.getElementsByName("events");
var k=0;
var url = "https://www.eventbriteapi.com/v3/events/search/?categories=";
for(var i=0;i<eventList.length;i++){
if(k>=3)
break;
if(eventList[i].checked){
if(k==2)
url+=eventList[i].value+"";
else
url+=eventList[i].value+",";
k++;
}
}
url+="&token=************";
}
The url variable is perfect. I want to use it in another script which is
var jsonData=getJSON('url').then(function(data)
{
var sjd="";
for(var i=0; i<data.events.length; i++)
{
var url = data.events[i].name.url;
sjd +='<h4>'+ "<br/>"+ data.events[i].name.text + '<br>'+'</h4>';
}
document.getElementById("events").innerHTML = sjd ;
}
);
As seen i want to use the url variable from first js to second which is used in getJSON function.
I found local storage and cookies as a possible way. But storage doesnt work even if i use the link stated in comment. I cant find any good resource where i can learn cookies. Also is there any other way possible?
As suggested, use localStorage to store do the job.
localStorage.setItem("myvar", "jour json serialised string of variable");
and on the other page use it
var myvar = localStorage.getItem("myvar");
Again, deserialise on the other page. JSON.stringify is something you should know already. Also jquery has jStore plugin for this.

Cant access array elements outside a function

I am trying to hash the name together with the path of all the files from a specific directory. After hashing I am saving the hash into the array(hash_orig). Here is what I have done.
var fs = require('fs');
var Hashes = require('jshashes');
var path = process.argv[2];
var path_elements = new Array();
var hash_orig = new Array();
fs.readdir(path, function(err, items) {
console.log('\nNumber of files in the directory: '+items.length);
console.log('Hashing algorithm: SHA1')
for (var i=0; i<items.length; i++) {
path_elements.push(path+"/"+items[i])
var SHA1 = new Hashes.SHA1().b64(path_elements[i])
console.log([i+1]+':\t'+items[i]+"\t" + SHA1)
hash_orig.push(SHA1);
}
});
console.log(hash_orig)
The problem:
The problem is that when I am pushing the hash into the hash_orig array and trying to access it outside the function fs.readdir the array hash_orig is empty. (console.log(hash_orig))
I need to access it outside in order to perform further some comparison operation to determine if the hash has been changed to Verifying Integrity of the files name and path.
What I am doing wrong?
Thank you.
fs.readdiris a async function. At the time when console.log(hash_orig)is reached, the callback of readdir is not called yet.
Put the log-statement at the end of your callback and you will see the results.
Yes you are simply missing that your fs.readdir function is a asynchronous callback.
Thus, when you call console.log(hash_orig), from outside, in fact the callback isnt completed yet.
Use a timeout and your array will filled :
setTimeout(function(){
console.log(hash_orig);
},500);

Writing to filesystem from within phantomjs sandboxed environment

I need to traverse forms on a site and save intermediate results to files. I'm using phantomjs' page.evaluate, but I'm having trouble accessing the filesystem from within page.evaluate's sandboxed environment. I have something like this:
for (var i = 0; i<option1.length; i++){
for (var ii = 0; ii<option2.length; ii++){
for (var iii = 0; iii<option3.length; iii++){
...
//I found what I want to save
fs.write("someFileName", someData);
}
}
}
Obviously, I don't have access to nodejs' fs from within page.evaluate, so the above does not work. I seem to have a few options:
Store everything I need to write to an array, and return that from the page.evaluate context into the outer, nodejs context, then save it from there. This would require memory I don't have.
Break up the above logic into smaller page.evaluate methods that return singe pieces of data to save to the filesytem.
Somehow pass into the page.evaluate a magic function to write to the filesystem. This seems to not be possible (if I try to pass in a function that calls fs.writeFile for example, I get that fs is undefined, even if fs is a free variable in the function I passed?)
Return an iterator which, when pulled, yields the next piece of data to be written
Setup a trivial web server on the localhost that simply accepts POST requests and writes their contents into the filesystem. The page.evaluate code would then make those requests to the localhost. I almost try this but I'm not sure I'll be affected by the same-origin policy.
What are my options here?
Your evaluation is sound, but you forgot one type: onCallback. You can register to the event handler in the phantom context and push your data from page context to a file through this callback:
page.onCallback = function(data) {
if (!data.file) {
data.file = "defaultFilename.txt";
}
if (!data.mode) {
data.mode = "w";
}
fs.write(data.file, data.str, data.mode);
};
...
page.evaluate(function(){
for (var i = 0; i<option1.length; i++){
for (var ii = 0; ii<option2.length; ii++){
for (var iii = 0; iii<option3.length; iii++){
...
// save data
if (typeof window.callPhantom === 'function') {
window.callPhantom({ file: "someFileName", str: someData, mode: "a" }); // append
}
}
}
}
});
Note that PhantomJS does not run in Node.js. Although, there are bridges between Node.js and PhantomJS. See also my answer here.

getting pictures into an array in javascript

im kinda new to javascript i mean i know the syntax but not so much the libraries.
i need some help to get some files (pictures) from a folder into an arry lets say:
var[] pictures = ?;
(the folder is in my project and contain some pictures)
so i can loop over them and diplay them on the page i did some search but i didnt find any guide on how to do this.
i realy want to understand on how to do this for future projects even a link to known guid you guys know we will be a big help.
if it help im using asp.net.
Well, there are a lot of ways to approach the problem, to me what you can do is (if you don't know the location of the images beforehand) make a service that returns the src of every image, store that in an array, and then show them in the page.
I believe you are using jQuery so you can make an ajax request like this:
jQuery.ajax({
url: /*path to*/"Service.asmx/getSources"
//options, check documentation
});
then, from asp, make a new service (Service.asmx in my case) and create a method that returns the location of the pictures (in my case the method is called getSources)
I recommend you use JSON (and jQuery.getJSON() method) so you can return a List<string>.
Lastly you can iterate or store the sources in an array, I'll put an example with the getJSON method
var sources = []
jQuery.getJSON("Service.asmx/getSources", function(data) {
for(var i = 0, len = data.length; i<len ; i++) {
sources.push(data[i]);//store every source in the array
}
});
once you have the sources you can display them like this fiddle
Tell me if it helped or if you need another solution.
If you want an array of pictures just to display them later, you can simply use:
var sources = [
"path/to/yourImage1.jpg",
"path/to/yourImage2.jpg",
// ...
"path/to/yourImageN.jpg",
];
var pics = [];
for(var i = 0; i < sources.length; i++) {
var pic = new Image();
pic.src = sources[i];
pics[i] = pic;
}

Finding javascript's origin

Is it possible from a Javascript code to find out "where" it came from?
I needed this to provide scripts that could run folder-agnostic, e.g.:
http://web1/service-a/script.js
http://web2/some-folder/service-b/script.js
And they're linked in:
http://web1/page1.html
http://web2/page2.html
The file script.js is identical in both locations but I would like them to be able to find out where it originates from so it can call the right web service methods (script.js from service-a should call service-a.method while script.js that is served from service-b should call service-b.method)
Is there a way to find out where script.js came from without using any server-side code?
Well, it's kind of a hack, you could grab all the <script> tags in the document, look at which one has the file name of the file, and do the logic from there.
document.getElementsByTagName('script'); is pretty much all you need. The rest is basic JS.
Even more interesting than looping through all of the returned elements (although that's probably safest), is that we can simply only look at the last element returned by the call above, as Javascript guarantees that must be the <script> tag we're in at the time of it being parsed (with the exception of deferred scripts). So this code:
var all_scripts = document.getElementsByTagName('script');
var current_script = scripts[all_scripts.length-1];
alert(current_script.src);
Will alert the source of the script tag used to include the current Javascript file.
You can analyze source of the html where script.js is included for tag and retrieve path of the script.js from there. Include next function in script.js and use it to retrieve the path.
function getPath() {
var path = null;
var scriptTags = document.getElementsByTagName("script");
for (var i = 0; i < scriptTags.length; i++) {
var scriptTagSrc = scriptTags.item(i).src;
if (scriptTagSrc && scriptTagSrc.indexOf("script.js") !== -1) {
path = scriptTagSrc;
break;
}
}
return path;
}

Categories

Resources