document is undefined in Google URL harvester - javascript

I have code taken from Google to harvest URLs from Google. I saved it as filename.js. When I run the file it showed "'document' is undefined". The part of the code which is showing problem is
// ==UserScript==
// #name Google URL Harvester
// #namespace http://userscripts.org/scripts/show/42998
// #description Harvests URLs from a Google Search
// #include http://www.google.co.uk/
// #include http://www.google.com/
// ==/UserScript==
var btn_container;
var inputs = document.getElementsByTagName("input");
for (var i = 0; i < inputs.length; i++) {
if (inputs[i].name == "btnG")
btn_container = inputs[i].parentNode;
}
function find_next_link(html) {
var url = html.match(/(<a href="[^"]+">)\s*<span[^>]+style="[^"]*background-position:\s?-76px\s/);
if (url == null)
return false;
var div = document.createElement("div");
div.innerHTML = url[1];
return div.firstChild.href;
}
function harvest(query_url, callback) {
ajax(query_url, function(e){
var als = e.match(/<a[^>]+class=l[^>]*>/g);
for (var i = 0; i < als.length; i++) {
urls.push(als[i].match(/href="([^"]+)"/)[1]);
}
var next_url = find_next_link(e);
if (next_url)
harvest(next_url, callback);
else
callback();
});
}
function ajax(url, callback) {
var req = new XMLHttpRequest();
req.onreadystatechange = function() {
if (req.readyState == 4 && req.status == 200) {
callback(req.responseText);
}
}
req.open("GET", url, true);
req.send("");
}
var new_button = document.createElement("input");
new_button.type = "button";
new_button.value = "Harvest URLs";
new_button.setAttribute("onsubmit", "return false;");
btn_container.appendChild(new_button);
var urls = [];
new_button.addEventListener("click", function(){
var query_url = unsafeWindow.document.forms[0].action + "?num=100&q="+escape(unsafeWindow.document.forms[0].q.value);
document.body.innerHTML = "<img src='http://oneworldwebsites.com/images/wheel%20throbber.gif' />";
harvest(query_url, function() {
document.body.innerHTML = urls.join("<br/>");
});
},false);
Here I have not defined document(if it is necessary). Can anybody please rectify the error in this code. Operating system is Windows 7.
I am saving this document to my desktop as harv.js and running it. Am I doing anything wrong?

Google like most websites updates their structure over time.
for (var i = 0; i < inputs.length; i++) {
if (inputs[i].name == "btnG")
btn_container = inputs[i].parentNode;
}
Needs to become
for (var i = 0; i < inputs.length; i++) {
if (inputs[i].name == "btnK") //<<------G to K
btn_container = inputs[i].parentNode;
}

I can just guess that the error is thrown because you try to access the "input" element, before creating it later. Moreover you shouldn't use a html element as a identifier in JavaScript. You could also use Firebug to pinpoint the error.

Related

How to dynamically load photos to dynamically loaded divs?

I wrote this code, which creates divs depending on the amount of text files in local directory.
Then I tried to write additional code, which appends photos to each of these divs. Unfortunately, this code doesn't append any photos...
function liGenerator() {
var xmlhttp = new XMLHttpRequest();
xmlhttp.onreadystatechange = function() {
if (xmlhttp.readyState == XMLHttpRequest.DONE) {
if (xmlhttp.status == 200) {
var n = (xmlhttp.responseText.match(/txt/g) || []).length;
for (var i = 1; i < n; i++) {
$.get("projects/txt/"+i+".txt", function(data) {
var line = data.split('\n');
var num = line[0]-"\n";
var clss = line[1];
var title = line[2];
var price = line[3];
var content = line[4];
$("#list-portfolio").append("<li class='item "+clss+" show' onclick='productSelection('"+num+"')'><img src='projects/src/"+num+"/title.jpg'/><div class='title'><h1>"+title+"</h1><h2>"+price+"</h2></div><article>"+content+"</article></li>");
$("#full-size-articles").append("<li class='product "+num+"'><div><div class='photo_gallery'><div id='fsa_img "+num+"'><div width='100%' class='firstgalleryitem'></div></div></div><article class='content'><h1 class='header_article'>"+title+"</h1><h2 class='price_article'>"+price+"</h2><section class='section_article'>"+content+"</section></article></div></li>");
});
}
}
}
};
xmlhttp.open("GET", "projects/txt/", true);
xmlhttp.send();
}
function pushPhotos() {
var list = document.getElementById("full-size-articles").getElementsByTagName("li");
var amount = list.length;
for(var i=1;i<=amount;i++) {
var divID = "#fsa_img "+i;
var where = "projects/src/"+i+"/";
var fx = ".jpg";
loadPhotos(where, fx, divID);
}
}
function loadPhotos(dir, fileextension, div) {
$.ajax({
url: dir,
success: function (data) {
$(data).find("a:contains(" + fileextension + ")").each(function () {
var filename = this.href.replace(window.location, "").replace("http://", "");
$(div).append("<img src='"+dir+filename+"' class='mini_photo'/>");
});
}
});
}
Any ideas on why this code is not working as intended?
The main issue is the space between "#fsa_img" and "i". When I changed it to '"#fsa_img_"+i', the code started work as intended.

'Juggling Async' - Why does my solution not return anything at all?

After asking a question and getting a very helpful answer on what the 'Async Juggling' assignment in learnyounode was asking me to do, I set out to implement it myself.
The problem is, my setup isn't having any success! Even though I've referred to other solutions out there, my setup simply isn't returning any results when I do a learnyounode verify myscript.js.
GIST: jugglingAsync.js
var http = require('http');
var app = (function () {
// Private variables...
var responsesRemaining,
urls = [],
responses = [];
var displayResponses = function() {
for(var iterator in responses) {
console.log(responses[iterator]);
}
};
// Public scope...
var pub = {};
pub.main = function (args) {
responsesRemaining = args.length - 2;
// For every argument, push a URL and prep a response.
for(var i = 2; i < args.length; i++) {
urls.push(args[i]);
responses.push('');
}
// For every URL, set off an async request.
for(var iterator in urls) {
var i = iterator;
var url = urls[i];
http.get(url, function(response) {
response.setEncoding('utf8');
response.on('data', function(data) {
if(response.headers.host == url)
responses[i] += data;
});
response.on('end', function() {
if(--responsesRemaining == 0)
displayResponses();
});
});
}
};
return pub;
})();
app.main(process.argv);
Question: What am I doing wrong?
This line
for(var iterator in urls) {
doesn't do what you think it does. It actually loops over the properties of urls (see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in). Instead, you have to do something like
for(var i = 0; i < urls.length; i++) {
var url = urls[i];
...
}
or
urls.forEach(function(url, index) {
...
});
In addition to not properly looping through the arrays inside the app module, I was also not properly concatenating data returned from the response.on('data') event. Originally I was doing...
responses[index] += data;
Instead, the correct thing to do was:
responses[index] = responses[index] + data;
Changing that, as well as the things noted by #arghbleargh got the 'Async Juggling' to fully verify!
I have tested my code and it all worked:
~ $ node juggling_async.js site1 site2 site3 site4 ...
The JS code does not limit only to three sites.
var http = require('http');
// Process all the site-names from the arguments and store them in sites[].
// This way does not limit the count to only 3 sites.
var sites = [];
(function loadSites() {
for(var i = 2, len = process.argv.length; i < len; ++i) {
var site = process.argv[i];
if(site.substr(0, 6) != 'http://') site = 'http://' + site;
sites.push(site);
}
})();
var home_pages = [];
var count = 0;
function httpGet(index) {
var home_page = '';
var site = sites[index];
http.get(site, function(res) {
res.setEncoding('utf8');
res.on('data', function(data) {
home_page += data;
});
res.on('end', function() {
++count;
home_pages[index] = home_page;
if(count == sites.length) {
// Yahoo! We have reached the last one.
for(var i = 0; i < sites.length; ++i) {
console.log('\n############ Site #' + (+i+1) + ': ' + sites[i]);
console.log(home_pages[i]);
console.log('============================================\n');
}
}
});
})
.on('error', function(e) {
console.log('Error at loop index ' + inddex + ': ' + e.message);
})
;
}
for(var i = 0; i < sites.length; ++i) {
httpGet(i);
}

Asynchronous execution of javascript code

I am studying javascript and json but I've some problems: I have a script that works with json but the performances of what I wrote aren't that good. The code works only if I do a debug step by step with firebug or other tools and that makes me think that the execution of the code (or a part of it ... the one that creates the table as you'll see) requires too much time so the browser stops it.
The code is:
var arrayCarte = [];
var arrayEntita = [];
var arraycardbyuser = [];
function displayArrayCards() {
var richiestaEntity = new XMLHttpRequest();
richiestaEntity.onreadystatechange = function() {
if(richiestaEntity.readyState == 4) {
var objectentityjson = {};
objectentityjson = JSON.parse(richiestaEntity.responseText);
arrayEntita = objectentityjson.cards;
}
}
richiestaEntity.open("GET", "danielericerca.json", true);
richiestaEntity.send(null);
for(i = 0; i < arrayEntita.length; i++) {
var vanityurla = arrayEntita[i].vanity_urls[0] + ".json";
var urlrichiesta = "http://m.airpim.com/public/vurl/";
var richiestaCards = new XMLHttpRequest();
richiestaCards.onreadystatechange = function() {
if(richiestaCards.readyState == 4) {
var objectcardjson = {};
objectcardjson = JSON.parse(richiestaCards.responseText);
for(j = 0; j < objectcardjson.cards.length; j++)
arrayCarte[j] = objectcardjson.cards[j].__guid__; //vettore che contiene i guid delle card
arraycardbyuser[i] = arrayCarte;
arrayCarte = [];
}
}
richiestaCards.open("GET", vanityurla, true);
richiestaCards.send(null);
}
var wrapper = document.getElementById('contenitoro');
wrapper.innerHTML = "";
var userTable = document.createElement('table');
for(u = 0; u < arrayEntita.length; u++) {
var userTr = document.createElement('tr');
var userTdcard = document.createElement('td');
var userTdinfo = document.createElement('td');
var br = document.createElement('br');
for(c = 0; c < arraycardbyuser[u].length; c++) {
var cardImg = document.createElement('img');
cardImg.src = "http://www.airpim.com/png/public/card/" + arraycardbyuser[u][c] + "?width=292";
cardImg.id = "immaginecard";
userTdcard.appendChild(br);
userTdcard.appendChild(cardImg);
}
var userdivNome = document.createElement('div');
userdivNome.id = "diverso";
userTdinfo.appendChild(userdivNome);
var userdivVanity = document.createElement('div');
userdivVanity.id = "diverso";
userTdinfo.appendChild(userdivVanity);
var nome = "Nome: ";
var vanityurl = "Vanity Url: ";
userdivNome.innerHTML = nome + arrayEntita[u].__title__;
userdivVanity.innerHTML = vanityurl + arrayEntita[u].vanity_urls[0];
userTr.appendChild(userTdcard);
userTr.appendChild(userTdinfo);
userTable.appendChild(userTr);
}
wrapper.appendChild(userTable);
}
The problem is that the code that should make the table doesn't wait for the complete execution of the code that works with the json files. How can I fix it? I would prefer,if possible, to solve that problem with something easy, without jquery and callbacks (I am a beginner).
You'll have to move som code around to make that work. at first, split it up in some functions, then it is easier to work with. I dont know if it works, but the idea is that first it loads the arrayEntita. When that is done, it fills the other 2 arrays. And when the last array has been filled, it builds the table.
var arrayCarte = [];
var arrayEntita = [];
var arraycardbyuser = [];
function displayArrayCards() {
var richiestaEntity = new XMLHttpRequest();
richiestaEntity.onreadystatechange = function () {
if (richiestaEntity.readyState == 4) {
var objectentityjson = {};
objectentityjson = JSON.parse(richiestaEntity.responseText);
arrayEntita = objectentityjson.cards;
BuildArrayEntita();
}
}
richiestaEntity.open("GET", "danielericerca.json", true);
richiestaEntity.send(null);
}
function BuildArrayEntita() {
for (i = 0; i < arrayEntita.length; i++) {
var vanityurla = arrayEntita[i].vanity_urls[0] + ".json";
var urlrichiesta = "http://m.airpim.com/public/vurl/";
var richiestaCards = new XMLHttpRequest();
richiestaCards.onreadystatechange = function () {
if (richiestaCards.readyState == 4) {
var objectcardjson = {};
objectcardjson = JSON.parse(richiestaCards.responseText);
for (j = 0; j < objectcardjson.cards.length; j++)
arrayCarte[j] = objectcardjson.cards[j].__guid__; //vettore che contiene i guid delle card
arraycardbyuser[i] = arrayCarte;
arrayCarte = [];
//If it is the last call to populate arraycardbyuser, build the table:
if (i + 1 == arrayEntita.length)
BuildTable();
}
}
richiestaCards.open("GET", vanityurla, true);
richiestaCards.send(null);
}
}
function BuildTable() {
var wrapper = document.getElementById('contenitoro');
wrapper.innerHTML = "";
var userTable = document.createElement('table');
for (u = 0; u < arrayEntita.length; u++) {
var userTr = document.createElement('tr');
var userTdcard = document.createElement('td');
var userTdinfo = document.createElement('td');
var br = document.createElement('br');
for (c = 0; c < arraycardbyuser[u].length; c++) {
var cardImg = document.createElement('img');
cardImg.src = "http://www.airpim.com/png/public/card/" + arraycardbyuser[u][c] + "?width=292";
cardImg.id = "immaginecard";
userTdcard.appendChild(br);
userTdcard.appendChild(cardImg);
}
var userdivNome = document.createElement('div');
userdivNome.id = "diverso";
userTdinfo.appendChild(userdivNome);
var userdivVanity = document.createElement('div');
userdivVanity.id = "diverso";
userTdinfo.appendChild(userdivVanity);
var nome = "Nome: ";
var vanityurl = "Vanity Url: ";
userdivNome.innerHTML = nome + arrayEntita[u].__title__;
userdivVanity.innerHTML = vanityurl + arrayEntita[u].vanity_urls[0];
userTr.appendChild(userTdcard);
userTr.appendChild(userTdinfo);
userTable.appendChild(userTr);
}
wrapper.appendChild(userTable);
}
i dont know if this check:
if (i + 1 == arrayEntita.length)
BuildTable();
but else you have to check if alle responseses have returned before executing buildtable();
AJAX requests are asynchronous. They arrive at an unknown period during execution and JavaScript does not wait for the server to reply before proceeding. There is synchronous XHR but it's not for ideal use. You'd lose the whole idea of AJAX if you do so.
What is usually done is to pass in a "callback" - a function that is executed sometime later, depending on when you want it executed. In your case, you want the table to be generated after you receive the data:
function getData(callback){
//AJAX setup
var richiestaEntity = new XMLHttpRequest();
//listen for readystatechange
richiestaEntity.onreadystatechange = function() {
//listen for state 4 and ok status (200)
if (richiestaEntity.readyState === 4 && richiestaEntity.status === 200) {
//execute callback when data is received passing it
//what "this" is in the callback function, as well as
//the returned data
callback.call(this,richiestaEntity.responseText);
}
}
richiestaEntity.open("GET", "danielericerca.json"); //third parameter defaults to true
richiestaEntity.send();
}
function displayArrayCards() {
//this function passed to getData will be executed
//when data arrives
getData(function(returnedData){
//put here what you want to execute when getData receives the data
//"returnedData" variable inside this function is the JSON returned
});
}
As soon as you have made the ajax call, put all of the rest of the code inside the readystatechange function. This way, it will execute everything in order.
Edit: #Dappergoat has explained it better than I can.

How do get param from a url

As seen below I'm trying to get #currentpage to pass client params
Can someone help out thanks.
$(document).ready(function() {
window.addEventListener("load", windowLoaded, false);
function windowLoaded() {
chrome.tabs.getSelected(null, function(tab) {
document.getElementById('currentpage').innerHTML = tab.url;
});
}
var url = $("currentpage");
// yes I relize this is the part not working.
var client = jQuery.param("currentpage");
var page = jQuery.param("currentpage");
var devurl = "http://#/?clientsNumber=" + client + "&pageName=" + page ;
});
This is a method to extract the params from a url
function getUrlParams(url) {
var paramMap = {};
var questionMark = url.indexOf('?');
if (questionMark == -1) {
return paramMap;
}
var parts = url.substring(questionMark + 1).split("&");
for (var i = 0; i < parts.length; i ++) {
var component = parts[i].split("=");
paramMap [decodeURIComponent(component[0])] = decodeURIComponent(component[1]);
}
return paramMap;
}
Here's how to use it in your code
var url = "?c=231171&p=home";
var params = getUrlParams(url);
var devurl = "http://site.com/?c=" + encodeURIComponent(params.c) + "&p=" + encodeURIComponent(params.p) + "&genphase2=true";
// devurl == "http://site.com/?c=231171&p=home&genphase2=true"
See it in action http://jsfiddle.net/mendesjuan/TCpsD/
Here's the code you posted with minimal changes to get it working, it also uses $.param as it's intended, that is to create a query string from a JS object, this works well since my suggested function returns an object from the url
$(document).ready(function() {
// This does not handle arrays because it's not part of the official specs
// PHP and some other server side languages support it but there's no official
// consensus
function getUrlParams(url) {
var paramMap = {};
var questionMark = url.indexOf('?');
if (questionMark == -1) {
return paramMap;
}
var parts = url.substring(questionMark + 1).split("&");
for (var i = 0; i < parts.length; i ++) {
var component = parts[i].split("=");
paramMap [decodeURIComponent(component[0])] = decodeURIComponent(component[1]);
}
return paramMap;
}
// no need for the extra load listener here, jquery.ready already puts
// your code in the onload
chrome.tabs.getSelected(null, function(tab) {
document.getElementById('currentpage').innerHTML = tab.url;
});
var url = $("currentpage");
var paramMap = getUrlParams(url);
// Add the genphase parameter to the param map
paramMap.genphase2 = true;
// Use jQuery.param to create the url to click on
var devurl = "http://site.com/?"+ jQuery.param(paramMap);
$('#mydev').click( function(){
window.open(devurl);
});
});

Intermittent behavior in my AJAX, Greasemonkey script

I've a small Greasemonkey script that doesn't include any random part, but its results change with each page reload.
I'm a noob and I'm probably doing something wrong, but I don't know what. I hope you'll be able to help me.
The code is too large and too poorly written to be reproduced here, so I'll try to sum up my situation:
I have a list of links which have href=javascript:void(0) and onclick=f(link_id).
f(x) makes an XML HTTP request to the server, and returns the link address.
My script is meant to precompute f(x) and change the href value when the page loads.
I have a function wait() that waits for the page to load, then a function findLinks() that gets the nodes that are to be changed (with xpath).
Then a function sendRequest() that sends the xhr to the server. And, finally handleRequest() that asynchronously (r.onreadystatechange) retrieves the response, and sets the nodes previously found.
Do you see anything wrong with this idea?
Using a network analyzer, I can see that the request is always sent fine, and the response also.
Sometimes the href value is changed, but sometimes for some links it isn't and remains javascript:void(0).
I really don't see why it works only half the time...
function getUrlParameterFromString(urlString, name) {
name = name.replace(/[\[]/, "\\\[").replace(/[\]]/, "\\\]");
var regexS = "[\\?&]" + name + "=([^&#]*)";
var regex = new RegExp(regexS);
var results = regex.exec(urlString);
if (results == null) {
return "";
} else {
return results[1];
}
}
function getUrlParameter(name) {
return getUrlParameterFromString(window.location.href, name);
}
function wait() {
var findPattern = "//a";
var resultLinks = document.evaluate(findPattern, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (resultLinks == null || resultLinks.snapshotLength == 0) {
return setTimeout(_wait, 100);
} else {
for (var i = 0, len = resultLinks.snapshotLength; i < len; i++) {
var node = resultLinks.snapshotItem(i);
var s = node.getAttribute('onclick');
var linkId = s.substring(2, s.length - 1); // f(x)->x
sendRequest(linkId, node);
}
}
}
function sendRequest(linkId, nodeToModify) {
window.XMLHttpRequest ? r = new XMLHttpRequest : window.ActiveXObject && (r = new ActiveXObject("Microsoft.XMLHTTP"));
if (r) {
r.open("POST", "some_url", !0);
r.onreadystatechange = function () {
handleRequest(nodeToModify, linkId, r);
}
r.setRequestHeader("Content-type", "application/x-www-form-urlencoded");
r.send(linkId);
}
}
function handleRequest(nodeToModify, num, r) {
if (r.readyState == 4) {
if (r.status == 200) {
console.log('handleRequest() used');
var a = r.responseText;
if (a == null || a.length < 10) {
sendRequest(num, nodeToModify);
} else {
var url = unescape((getUrlParameterFromString(a, "url")).replace(/\+/g, " "));
nodeToModify.setAttribute('href', url);
nodeToModify.setAttribute('onclick', "");
}
} else {
alert("An error occurred: " + r.statusText)
}
}
}
wait();
It looks like that script will change exactly 1 link. Look-up "closures"; this loop:
for (var i = 0, len = resultLinks.snapshotLength; i < len; i++) {
var node = resultLinks.snapshotItem(i);
var s = node.getAttribute('onclick');
var linkId = s.substring(2, s.length - 1); // f(x)->x
sendRequest(linkId, node);
}
needs a closure so that sendRequest() gets the correct values. Otherwise, only the last link will be modified.
Try:
for (var i = 0, len = resultLinks.snapshotLength; i < len; i++) {
var node = resultLinks.snapshotItem(i);
var s = node.getAttribute('onclick');
var linkId = s.substring(2, s.length - 1); // f(x)->x
//-- Create a closure so that sendRequest gets the correct values.
( function (linkId, node) {
sendRequest (linkId, node);
}
)(linkId, node);
}

Categories

Resources