I am calling an async function on all the HTML of a page 5000 characters at a time, and then want to replace the 5000 characters in place as I hear back from the API. However I am having trouble replacing the returned text in place.
Note that this is async, so they can come back in any order. The returned text should just 'pop-in' as the appropriate API function returns.
var textbody = document.body.innerHTML;
var loops = textbody.length/5000;
var translatedHTML = '';
for (var i = 0; i < loops; i++) {
(function (i) {
var translatetext = document.body.innerHTML.substring(5000*i, (5000*i)+5000);
//call an Async API to translatetext
//callback for async API call
{
var translation = xmlhttp2.responseText;
var refreshHTML = document.body.innerHTML;
//WHAT AM I DOING HERE?! NEED TO REPLACE TEXT IN PLACE
translatedHTML = refreshHTML.slice(5000*(i-1), 5000*i) + arraytrans + refreshHTML.slice(5000*i+5000);
document.body.innerHTML = translatedHTML;
}
})(i);
}
});
Update: I tried linostar's approach and while it does replace the HTML as expected, it completely screws up the formatting of the page. Is there a different approach I should take instead?
(function (i) {
var translatetext = document.body.innerHTML.substring(5000*i, (5000*i)+5000);
//call an Async API to translatetext
//callback for async API call
{
var translation = xmlhttp2.responseText;
var refreshHTML = document.body.innerHTML;
if (i == 0)
translatedHTML = arraytrans + refreshHTML.substring(5000*i+5000);
else if (i == loops - 1)
translatedHTML = refreshHTML.substring(0, 5000*i) + arraytrans;
else
translatedHTML = refreshHTML.substring(0, 5000*i) + arraytrans + refreshHTML.substring(5000*i+5000);
document.body.innerHTML = translatedHTML;
}
})(i);
Why are you isng slice function on refreshHTML? slice is used to cut portions from an array, and you are not using it on an array here.
Use the following instead:
if (i == 0)
translatedHTML = arraytrans + refreshHTML.substring(5000*i+5000);
else if (i == loops - 1)
translatedHTML = refreshHTML.substring(0, 5000*i) + arraytrans;
else
translatedHTML = refreshHTML.substring(0, 5000*i) + arraytrans + refreshHTML.substring(5000*i+5000);
Related
I wonder if is possible to get the text inside of a PDF file by using only Javascript?
If yes, can anyone show me how?
I know there are some server-side java, c#, etc libraries but I would prefer not using a server.
thanks
Because pdf.js has been developing over the years, I would like to give a new answer. That is, it can be done locally without involving any server or external service. The new pdf.js has a function: page.getTextContent(). You can get the text content from that. I've done it successfully with the following code.
What you get in each step is a promise. You need to code this way: .then( function(){...}) to proceed to the next step.
PDFJS.getDocument( data ).then( function(pdf) {
pdf.getPage(i).then( function(page){
page.getTextContent().then( function(textContent){
What you finally get is an string array textContent.bidiTexts[]. You concatenate them to get the text of 1 page. Text blocks' coordinates are used to judge whether newline or space need to be inserted. (This may not be totally robust, but from my test it seems ok.)
The input parameter data needs to be either a URL or ArrayBuffer type data. I used the ReadAsArrayBuffer(file) function in FileReader API to get the data.
Note: According to some other user, the library has updated and caused the code to break. According to the comment by async5 below, you need to replace textContent.bidiTexts with textContent.items.
function Pdf2TextClass(){
var self = this;
this.complete = 0;
/**
*
* #param data ArrayBuffer of the pdf file content
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done;
* 2) total number of pages in file.
* #param callbackAllDone The input parameter of callback function is
* the result of extracted text from pdf file.
*
*/
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
PDFJS.getDocument( data ).then( function(pdf) {
var div = document.getElementById('viewer');
var total = pdf.numPages;
callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
if( null != textContent.bidiTexts ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.bidiTexts.length; k++ ){
var block = textContent.bidiTexts[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
I couldn't get gm2008's example to work (the internal data structure on pdf.js has changed apparently), so I wrote my own fully promise-based solution that doesn't use any DOM elements, queryselectors or canvas, using the updated pdf.js from the example at mozilla
It eats a file path for the upload since i'm using it with node-webkit.
You need to make sure you have the cmaps downloaded and pointed somewhere and you nee pdf.js and pdf.worker.js to get this working.
/**
* Extract text from PDFs with PDF.js
* Uses the demo pdf.js from https://mozilla.github.io/pdf.js/getting_started/
*/
this.pdfToText = function(data) {
PDFJS.workerSrc = 'js/vendor/pdf.worker.js';
PDFJS.cMapUrl = 'js/vendor/pdfjs/cmaps/';
PDFJS.cMapPacked = true;
return PDFJS.getDocument(data).then(function(pdf) {
var pages = [];
for (var i = 0; i < pdf.numPages; i++) {
pages.push(i);
}
return Promise.all(pages.map(function(pageNumber) {
return pdf.getPage(pageNumber + 1).then(function(page) {
return page.getTextContent().then(function(textContent) {
return textContent.items.map(function(item) {
return item.str;
}).join(' ');
});
});
})).then(function(pages) {
return pages.join("\r\n");
});
});
}
usage:
self.pdfToText(files[0].path).then(function(result) {
console.log("PDF done!", result);
})
Just leaving here a full working sample.
<html>
<head>
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
</head>
<body>
<input id="pdffile" name="pdffile" type="file" />
<button id="btn" onclick="convert()">Process</button>
<div id="result"></div>
</body>
</html>
<script>
function convert() {
var fr=new FileReader();
var pdff = new Pdf2TextClass();
fr.onload=function(){
pdff.pdfToText(fr.result, null, (text) => { document.getElementById('result').innerText += text; });
}
fr.readAsDataURL(document.getElementById('pdffile').files[0])
}
function Pdf2TextClass() {
var self = this;
this.complete = 0;
this.pdfToText = function (data, callbackPageDone, callbackAllDone) {
console.assert(data instanceof ArrayBuffer || typeof data == 'string');
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function (pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++) {
pdf.getPage(i).then(function (page) {
var n = page.pageNumber;
page.getTextContent().then(function (textContent) {
//console.log(textContent.items[0]);0
if (null != textContent.items) {
var page_text = "";
var last_block = null;
for (var k = 0; k < textContent.items.length; k++) {
var block = textContent.items[k];
if (last_block != null && last_block.str[last_block.str.length - 1] != ' ') {
if (block.x < last_block.x)
page_text += "\r\n";
else if (last_block.y != block.y && (last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total) {
window.setTimeout(function () {
var full_text = "";
var num_pages = Object.keys(layers).length;
for (var j = 1; j <= num_pages; j++)
full_text += layers[j];
callbackAllDone(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
</script>
Here's some JavaScript code that does what you want using Pdf.js from http://hublog.hubmed.org/archives/001948.html:
var input = document.getElementById("input");
var processor = document.getElementById("processor");
var output = document.getElementById("output");
// listen for messages from the processor
window.addEventListener("message", function(event){
if (event.source != processor.contentWindow) return;
switch (event.data){
// "ready" = the processor is ready, so fetch the PDF file
case "ready":
var xhr = new XMLHttpRequest;
xhr.open('GET', input.getAttribute("src"), true);
xhr.responseType = "arraybuffer";
xhr.onload = function(event) {
processor.contentWindow.postMessage(this.response, "*");
};
xhr.send();
break;
// anything else = the processor has returned the text of the PDF
default:
output.textContent = event.data.replace(/\s+/g, " ");
break;
}
}, true);
...and here's an example:
http://git.macropus.org/2011/11/pdftotext/example/
Note: This code assumes you're using nodejs. That means you're parsing a local file instead of one from a web page since the original question doesn't explicitly ask about parsing pdfs on a web page.
#gm2008's answer was a great starting point (please read it and its comments for more info), but needed some updates (08/19) and had some unused code. I also like examples that are more full. There's more refactoring and tweaking that could be done (e.g. with await), but for now it's as close to that original answer as it could be.
As before, this uses Mozilla's PDFjs library. The npmjs package is at https://www.npmjs.com/package/pdfjs-dist.
In my experience, this doesn't do well in finding where to put spaces, but that's a problem for another time.
[Edit: I believe the update to the use of .transform has restored the whitespace as it originally behaved.]
// This file is called myPDFfileToText.js and is in the root folder
let PDFJS = require('pdfjs-dist');
let pathToPDF = 'path/to/myPDFfileToText.pdf';
let toText = Pdf2TextObj();
let onPageDone = function() {}; // don't want to do anything between pages
let onFinish = function(fullText) { console.log(fullText) };
toText.pdfToText(pathToPDF, onPageDone, onFinish);
function Pdf2TextObj() {
let self = this;
this.complete = 0;
/**
*
* #param path Path to the pdf file.
* #param callbackPageDone To inform the progress each time
* when a page is finished. The callback function's input parameters are:
* 1) number of pages done.
* 2) total number of pages in file.
* 3) the `page` object itself or null.
* #param callbackAllDone Called after all text has been collected. Input parameters:
* 1) full text of parsed pdf.
*
*/
this.pdfToText = function(path, callbackPageDone, callbackAllDone) {
// console.assert(typeof path == 'string');
PDFJS.getDocument(path).promise.then(function(pdf) {
let total = pdf.numPages;
callbackPageDone(0, total, null);
let pages = {};
// For some (pdf?) reason these don't all come in consecutive
// order. That's why they're stored as an object and then
// processed one final time at the end.
for (let pagei = 1; pagei <= total; pagei++) {
pdf.getPage(pagei).then(function(page) {
let pageNumber = page.pageNumber;
page.getTextContent().then(function(textContent) {
if (null != textContent.items) {
let page_text = "";
let last_item = null;
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) {
let item = textContent.items[itemsi];
// I think to add whitespace properly would be more complex and
// would require two loops.
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') {
let itemX = item.transform[5]
let lastItemX = last_item.transform[5]
let itemY = item.transform[4]
let lastItemY = last_item.transform[4]
if (itemX < lastItemX)
page_text += "\r\n";
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null))
page_text += ' ';
} // ends if may need to add whitespace
page_text += item.str;
last_item = item;
} // ends for every item of text
textContent != null && console.log("page " + pageNumber + " finished.") // " content: \n" + page_text);
pages[pageNumber] = page_text + "\n\n";
} // ends if has items
++self.complete;
callbackPageDone(self.complete, total, page);
// If all done, put pages in order and combine all
// text, then pass that to the callback
if (self.complete == total) {
// Using `setTimeout()` isn't a stable way of making sure
// the process has finished. Watch out for missed pages.
// A future version might do this with promises.
setTimeout(function() {
let full_text = "";
let num_pages = Object.keys(pages).length;
for (let pageNum = 1; pageNum <= num_pages; pageNum++)
full_text += pages[pageNum];
callbackAllDone(full_text);
}, 1000);
}
}); // ends page.getTextContent().then
}); // ends page.then
} // ends for every page
});
}; // Ends pdfToText()
return self;
}; // Ends object factory
Run in the terminal:
node myPDFfileToText.js
Updated 02/2021
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
<script>
function Pdf2TextClass(){
var self = this;
this.complete = 0;
this.pdfToText = function(data, callbackPageDone, callbackAllDone){
console.assert( data instanceof ArrayBuffer || typeof data == 'string' );
var loadingTask = pdfjsLib.getDocument(data);
loadingTask.promise.then(function(pdf) {
var total = pdf._pdfInfo.numPages;
//callbackPageDone( 0, total );
var layers = {};
for (i = 1; i <= total; i++){
pdf.getPage(i).then( function(page){
var n = page.pageNumber;
page.getTextContent().then( function(textContent){
//console.log(textContent.items[0]);0
if( null != textContent.items ){
var page_text = "";
var last_block = null;
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
}
page_text += block.str;
last_block = block;
}
textContent != null && console.log("page " + n + " finished."); //" content: \n" + page_text);
layers[n] = page_text + "\n\n";
}
++ self.complete;
//callbackPageDone( self.complete, total );
if (self.complete == total){
window.setTimeout(function(){
var full_text = "";
var num_pages = Object.keys(layers).length;
for( var j = 1; j <= num_pages; j++)
full_text += layers[j] ;
console.log(full_text);
}, 1000);
}
}); // end of page.getTextContent().then
}); // end of page.then
} // of for
});
}; // end of pdfToText()
}; // end of class
var pdff = new Pdf2TextClass();
pdff.pdfToText('PDF_URL');
</script>
For all the people who actually want to use it on a node server:
/**
* Created by velten on 25.04.16.
*/
"use strict";
let pdfUrl = "http://example.com/example.pdf";
let request = require('request');
var pdfParser = require('pdf2json');
let pdfPipe = request({url: pdfUrl, encoding:null}).pipe(pdfParser);
pdfPipe.on("pdfParser_dataError", err => console.error(err) );
pdfPipe.on("pdfParser_dataReady", pdf => {
//optionally:
//let pdf = pdfParser.getMergedTextBlocksIfNeeded();
let count1 = 0;
//get text on a particular page
for (let page of pdf.formImage.Pages) {
count1 += page.Texts.length;
}
console.log(count1);
pdfParser.destroy();
});
It is possible but:
you would have to use the server anyway, there's no way you can get content of a file on user computer without transferring it to server and back
I don't thing anyone has written such library yet
So if you have some free time you can learn pdf format and write such a library yourself, or you can just use server side library of course.
From my api provider i have a code thats suposed to generate a hmac key.
<html>
<head>
</head>
<body>
<p id="demo"></p>
<script>var BuckarooHmac = (function () {
var self = {};
function getEncodedContent(content) {
if (content) {
var md5 = CryptoJS.MD5(content);
var base64 = CryptoJS.enc.Base64.stringify(md5);
return base64;
}
return content;
}
function getHash(websiteKey, secretKey, httpMethod, nonce, timeStamp, requestUri, content) {
var encodedContent = getEncodedContent(content);
var rawData = websiteKey + httpMethod + requestUri + timeStamp + nonce + encodedContent;
var hash = CryptoJS.HmacSHA256(rawData, secretKey);
var hashInBase64 = CryptoJS.enc.Base64.stringify(hash);
return hashInBase64;
}
function getTimeStamp() {
return Math.floor((new Date).getTime() / 1000);
}
function getNonce() {
var text = "";
var possible = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
for (var i = 0; i < 16; i++) {
text += possible.charAt(Math.floor(Math.random() * possible.length));
}
return text;
}
self.GetAuthHeader = function (requestUri, websiteKey, secretKey, content, httpMethod) {
var nonce = getNonce();
var timeStamp = getTimeStamp();
content = content ? content : "";
var url = encodeURIComponent(requestUri).toLowerCase();
return "hmac " + websiteKey + ":" + getHash(websiteKey, secretKey, httpMethod, nonce, timeStamp, url, content) + ":" + nonce + ":" + timeStamp;
}
return self;
document.getElementById("demo").innerHTML = self.GetAuthHeader();
}());
</script>
</body>
</html>
I'm not used to javascript. I'm trying to figure out how to print the generated key on my screen. I tried this :
document.getElementById("demo").innerHTML = self.GetAuthHeader();
I know i must be doing this wrong. I just need a push in the right direction now. Anyone that could help me ?
You’re trying to perform an action after the function’s return statement. That code will never be reached because the function has returned.
Instead, do it before:
document.getElementById("demo").innerHTML = self.GetAuthHeader();
return self;
Or, even better, if this code is provided by a vendor then you probably shouldn’t edit it. Updates would remove your edits, and vendor support would be compromised. Instead, perform your action outside the code entirely:
var BuckarooHmac = (function () {
// vendor code
}());
document.getElementById("demo").innerHTML = BuckarooHmac.GetAuthHeader();
I'm working on using Twitch's API (from an alternate link because of CORS). The html comes back empty when this is ran. Is it because of the callback and what can I do to get the data? I can confirm that the link works and have tried stepping through it with no luck.
let usernames = ['freecodecamp'];
let api = '';
let html = '';
for(let i = 0; i < usernames.length; i++) {
api = 'https://wind-bow.gomix.me/twitch-api/streams/' + usernames[i] + '?callback=?';
$.getJSON(api, function(data) {
let online = data.stream == null;
if(online) {
html += usernames[i] + '\nStatus: Offline';
}
else {
html += usernames[i] + '\nStatus: Online';
}
});
}
if(html != '') {
$('#data_display').html('<h1>' + html + '</h1>');
}
Put your last append html block inside the getJSON :
$.getJSON(api, function(data) {
let online = data.stream == null;
if(online) {
html = usernames[i] + '\nStatus: Offline';
}
else {
html = usernames[i] + '\nStatus: Online';
}
$('#data_display').append('<h1>' + html + '</h1>');
});
And also use .append instead of .html , otherwise it will replace the html inside of #data_display.
There is no need to do html += ....
I think javascript promise will help with your situation, take jQuery deferred API as an example:
function fetchUserStatus(username) {
var defer = jQuery.Deferred();
var api = 'https://wind-bow.gomix.me/twitch-api/streams/' + username + '?callback=?';
$.getJSON(api, function(data) {
let online = data.stream == null;
if (online) {
defer.resolve('Offline');
} else {
defer.resolve('Online');
}
});
return defer.promise();
}
let usernames = ['freecodecamp'];
let promises = [];
for (let i = 0; i < usernames.length; i++) {
promises.push(fetchUserStatus(usernames[i]));
}
$.when.apply($, promises).then(function() {
let html = '';
for (let i = 0; i < arguments.length; i++) {
html += usernames[i] + '\nStatus: ' + arguments[i];
}
$('#data_display').append('<h1>' + html + '</h1>');
});
The jQuery when does not support passing an array of promises, that's why the apply function is used, and at the then callback, you'll need to use the arguments to refer to all the resolved value from the promises.
Wish this will help.
I'm using XMLHTTPREQUEST synchronous and while the request is getting fulfilled, I hide everything on page and show a progress message. This works fine in Mozilla but doesn not in Chrome.
Mozilla firefox:
For chrome:
[after submit][3]
here is my javascript code:
if (valid) {
document.getElementById('contentwrapper').style.display="none";
document.getElementById('contentwrapper').style.visibility="hidden";
var cboxes = document.getElementsByName('items[]');
var len = cboxes.length;
var res1;
var res2;
var res3;
var res4;
var arrResult;
for (var i = 0; cboxes[i]; ++i) {
if (cboxes[i].checked) {
checkedValue = cboxes[i].value;
arrResult = checkedValue.split(',');
mobileNo=arrResult[0];
if(arrResult[0].trim().startsWith('0')){
res1 = arrResult[0].substring(1,arrResult[0].length);
}else{
res1 = arrResult[0];
}
res2 = arrResult[1];
res3 = arrResult[2];
res4 = arrResult[3];
requestSender(1, res1, res2, res3, res4);
}
}
function requestSender(code, mobile, operator, productCode, amount) {
var http = getHTTPObject();
var enterKeyHandler = true;
var reqNum = 0;
var seqNum = 0;
var dataCount = 0;
var Values = "";
var code;
var comments = "";
reqCode = code;
if (code == 1) {
var agentcode = '<%=payeeaccountno%>';
var pin = document.getElementById("mpin").value;
source = agentcode; // source and agent code both are same.
operator = operator;
destination = mobile;
productCode = productCode;
if (destination.charAt(0) == '0') {
destination = destination.replace(/^0+/, "");
}
var amt = amount;
var amtPrd = new Array();
amtPrd = amt.split("#");
if (amtPrd.length > 0)
{
amount = amtPrd[0];
// productCode = amtPrd[1];
}
else
{
amount = 0;
}
clienttype = "SELFCARE";
vendorcode = '<%=vendorcode%>';
dataCount = '<%=cryptHandler.encrypt("8",session.getId()+session.getId())%>';
reqNum = '<%=cryptHandler.encrypt("1",session.getId()+session.getId())%>';
seqNum = '<%=cryptHandler.encrypt("2",session.getId()+session.getId())%>';
Values = "&var1=" + escape(agentcode) + "&var2=" + escape(pin) + "&var3=" + escape(amount) + "&var4=" + escape(mobile) + "&var5=" + escape(productCode) + "&var6=" + escape(clienttype) + "&var7=" + escape(operator) + "&var8=" + escape(vendorcode);
}
var varReq = "../Comman/requestForwarder.jsp?page=" + escape("<%=reqPageName%>") + "&reqNum=" + escape(reqNum) + "&seqNum=" + escape(seqNum) + "&dataCount=" + escape(dataCount) + Values;
varReq = varReq.replace(/\+/g, "%2B");
http.open("GET", varReq, false);
httpBuffer = http;
reqCodeBuffer = reqCode;
http.onreadystatechange = responseHandler;
http.send(null);
}
You should actually post some code or at least print the messages in the developer console (ctr + shift + J)
However, the stem of your issue is probably the fact that you are trying to use a synchronous xmlhttpRequest on the "main" execution thread.
Its a bad practice (since It blocks all code from running until the server "answers" your request) and browsers are starting to phase it out. So, if you are using a newer version of chrome, its likely not running the code and complaining about the sync request.
Either use a Web Worker (js version of thread) to run the synchronous XMLHTTPRequest(s) or, better yet, if possible, use the asynchronous version.
Further reading:
https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/Synchronous_and_Asynchronous_Requests
After asking a question and getting a very helpful answer on what the 'Async Juggling' assignment in learnyounode was asking me to do, I set out to implement it myself.
The problem is, my setup isn't having any success! Even though I've referred to other solutions out there, my setup simply isn't returning any results when I do a learnyounode verify myscript.js.
GIST: jugglingAsync.js
var http = require('http');
var app = (function () {
// Private variables...
var responsesRemaining,
urls = [],
responses = [];
var displayResponses = function() {
for(var iterator in responses) {
console.log(responses[iterator]);
}
};
// Public scope...
var pub = {};
pub.main = function (args) {
responsesRemaining = args.length - 2;
// For every argument, push a URL and prep a response.
for(var i = 2; i < args.length; i++) {
urls.push(args[i]);
responses.push('');
}
// For every URL, set off an async request.
for(var iterator in urls) {
var i = iterator;
var url = urls[i];
http.get(url, function(response) {
response.setEncoding('utf8');
response.on('data', function(data) {
if(response.headers.host == url)
responses[i] += data;
});
response.on('end', function() {
if(--responsesRemaining == 0)
displayResponses();
});
});
}
};
return pub;
})();
app.main(process.argv);
Question: What am I doing wrong?
This line
for(var iterator in urls) {
doesn't do what you think it does. It actually loops over the properties of urls (see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in). Instead, you have to do something like
for(var i = 0; i < urls.length; i++) {
var url = urls[i];
...
}
or
urls.forEach(function(url, index) {
...
});
In addition to not properly looping through the arrays inside the app module, I was also not properly concatenating data returned from the response.on('data') event. Originally I was doing...
responses[index] += data;
Instead, the correct thing to do was:
responses[index] = responses[index] + data;
Changing that, as well as the things noted by #arghbleargh got the 'Async Juggling' to fully verify!
I have tested my code and it all worked:
~ $ node juggling_async.js site1 site2 site3 site4 ...
The JS code does not limit only to three sites.
var http = require('http');
// Process all the site-names from the arguments and store them in sites[].
// This way does not limit the count to only 3 sites.
var sites = [];
(function loadSites() {
for(var i = 2, len = process.argv.length; i < len; ++i) {
var site = process.argv[i];
if(site.substr(0, 6) != 'http://') site = 'http://' + site;
sites.push(site);
}
})();
var home_pages = [];
var count = 0;
function httpGet(index) {
var home_page = '';
var site = sites[index];
http.get(site, function(res) {
res.setEncoding('utf8');
res.on('data', function(data) {
home_page += data;
});
res.on('end', function() {
++count;
home_pages[index] = home_page;
if(count == sites.length) {
// Yahoo! We have reached the last one.
for(var i = 0; i < sites.length; ++i) {
console.log('\n############ Site #' + (+i+1) + ': ' + sites[i]);
console.log(home_pages[i]);
console.log('============================================\n');
}
}
});
})
.on('error', function(e) {
console.log('Error at loop index ' + inddex + ': ' + e.message);
})
;
}
for(var i = 0; i < sites.length; ++i) {
httpGet(i);
}