I'm new to javascript and promises, I had a requirement where I need to get the http status of my URL's(i have 10 URL's) all at a time (Not sequentialy).
So I wrote below code.
var request = require('request');
var fun = function(i) {
request('http://myapp' + i + '.com', function(error, response, body) {
console.log(response && response.statusCode, i);
});
};
for (i = 0; i < 10; i++) {
fun(i);
}
but I'm getting
status1 then status2 then status3 and so on..... my requirement is to print all the status at one time.
then tried below code
var request = require('request');
var fun = function(myapp) {
return new Promise(function(resolve, reject) {
request('http://' + myapp + '.com', function(error, response, body) {
resolve(response && response.statusCode);
});
});
};
for (i = 0; i < 10; i++) {
fun('myapp' + i).then(function(val1) {
console.log(val1);
});
}
but still, I'm getting status1 then status2 then status3 and so on.....
Any help is appreciated.
You could collect all the promises in an array, then use Promise.all on that to get an array of results:
const promises = [];
for (i = 0; i < 10; i++) {
promises.push( fun('myapp' + i));
Promise.all(promises)
.then(console.log, console.error);
Your code is correct. It executes asynchronously in parallel.
However you are confused into seeing output be printed in sequence (or what looks like sequence) but this is normal, no matter if parallel or synhronous, output (as it is coded will be printed one after the other, maybe not in same order).
if you want to output all at once when finished, do sth like the following:
var request = require('request');
var finished = new Array(10);
var numFinished = 0;
var fun = function(i) {
request('http://myapp' + i + '.com', function(error, response, body) {
finished[i] = response && response.statusCode ? response.statusCode : 'No response';
numFinished++;
});
};
for (i = 0; i < 10; i++) {
fun(i);
}
var timer = setTinterval(function(){
if ( 10 <= numFinished )
{
clearInterval(timer);
console.log(finished.join(',')); // print all at once
}
}, 500);
Or if you use promises you can do:
var request = require('request');
var fun = function(myapp) {
return new Promise(function(resolve, reject) {
request('http://' + myapp + '.com', function(error, response, body) {
resolve(response && response.statusCode ? response.statusCode : 'No response');
});
});
};
var promises = new Array(10);
for (i = 0; i < 10; i++) {
promises[i] = fun('myapp' + i);
Promise.all(promises).then(function(results){
console.log(results.join(',')); // print all at once
});
Related
I am trying to build a file of json data from repeated calls to a restAPI. The final file to be written is the sum of the data received from all the calls. At present the file is being written with contents of the first call then overwritten by the contents of the first + second call (see console output below code).
As I have to make many calls, once the code is working, I would like to only write the file once the request has finished and the json string has been built. Does anyone now how I would go about doing this? Maybe with a callback(?), which I still don't have the hang of, once the requests have finished or the json string has finished being built.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({delimiter: ','}))
.on('data', function(data) {
csvData.push(data[1]);
})
.on('end',function() {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function(error, response, body) {
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log( '\n' + (Buffer.byteLength(s)/1000).toFixed(2)+
" kilobytes downloaded in: " + (time/1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s .length-1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
fs.writeFile(pathJSON, jsonData, function(err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
128.13 kilobytes downloaded in: 2.796 sec
2
file complete
256.21 kilobytes downloaded in: 3.167 sec
2
file complete
Perhaps writing to the file after all requests are complete will help. In the current code, the writeFile function is called each time a request is completed (which overwrites the file each time)
A quick way to fix this is to count requests (and failures) and write to file only after all the requests are complete.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({
delimiter: ','
}))
.on('data', function (data) {
csvData.push(data[1]);
})
.on('end', function () {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
var jsonsDownloaded = 0; // Counter to track complete JSON requests
var jsonsFailed = 0; // Counter to handle failed JSON requests
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function (error, response, body) {
if(error){
jsonsFailed++;
writeFile(jsonData);
return;
}
jsonsDownloaded++;
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log('\n' + (Buffer.byteLength(s) / 1000).toFixed(2) +
" kilobytes downloaded in: " + (time / 1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s.length - 1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
if(dataLength - (jsonsDownloaded + jsonsFailed) > 0){
return;
}
fs.writeFile(pathJSON, jsonData, function (err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
Note:
Requests being fired in quick succession like (2000 requests in a for loop) in my experience does not work well.. Try batching them. Also, doing it this way does not guarantee order (if that is important in your usecase)
An alternative would be to open your file in append mode. You can do this by passing an extra options object with flag set to your fs.writeFile call.
fs.writeFile(pathJSON, jsonData, {
flag: 'a'
}, function (err) {
if (err) {
return console.log(err);
}
});
References:
fs.writeFile Docs
File system flags
having fun with promises in JS and trying to craft simple xpath website parser, but I am struggling with logic on finishing overall parsing process, my code is:
var request = require('request');
var xpath = require('xpath');
var dom = require('xmldom').DOMParser;
var olxMain = 'https://www.some.site/';
var xpathRoot = '//a[contains(#href, "https://www.some.site/mask/")]';
var linksXpath = '//a';
var allGlobalLinks = [];
var getLink = function (node) {
for (key in node['attributes']) {
if (node['attributes'][key]['name'] === 'href') {
return node['attributes'][key]['value'];
}
}
}
var getData = function (url, xpathPattern) {
return new Promise(function (resolve, reject) {
console.log("Opening " + url);
var processResponse = function (error, response, body) {
var doc = new dom().parseFromString(body);
var childNodes = xpath.select(xpathPattern, doc);
var links = childNodes.map(function (n) {
return getLink(n);
});
resolve(links);
};
request({url: url}, processResponse);
}
);
}
var arrayUnique = function (x, i, a) {
return a.indexOf(x) == i;
};
var main = function () {
getData(olxMain, xpathRoot).then(function (links) {
links = links.filter(arrayUnique);
var maxThreads = 10, n = 0;
var chunks = [];
for (k in links) {
var url = links[k];
n++;
if (n <= maxThreads)
chunks.push(url);
else {
n = 0;
// console.log(chunks);
Promise.all(chunks.map(function (url) {
return getData(url, linksXpath);
})).then(function (links) {
// add these links to global scope list here
});
console.log("Finished mappings iteration");
});
chunks = [];
}
}
;
});
}
main();
So what I want is basically some kind of threadPool with promises, how to I manage these 10 promises, when they all are finished, I should spawn another 10 more, until list is finished and all Promises have finished ?
I'm scraping a website , i use request library with node js, so I got an array of urls I loop through and do a request on each, the problem is that requests are asynchronous and I need to do something ONLY after all the requests are completed here is the code:
for (var i = 0; i < urls.length; i++) {
request(urls[i], function (err,resp,body) {
if (!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
var string = $('.author .mini').text();
var regExp = /(\+971|00971|05)\d{1,12}/g;
if(string.match(regExp)) {mobilePhones.push(string.match(regExp)[0])}
}
});
}
so when all the requests are done, I just want to console.log(mobilePhones);
This would be much easier to accomplish using promise.all():
var myFunc = function (err,resp,body) {
if (!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
var string = $('.author .mini').text();
var regExp = /(\+971|00971|05)\d{1,12}/g;
if(string.match(regExp)) {mobilePhones.push(string.match(regExp)[0])}
};
var p1 = request(urls[0], myFunc);
var p2 = request(urls[1], myFunc);
// Do this for all of urls.length
Promise.all([p1, p2, p3, p4, p5]).then(() => {
console.log(mobilePhones);
});
var count = urls.length;
for (var i = 0; i < urls.length; i++) {
request(urls[i], function (err,resp,body) {
if (!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
var string = $('.author .mini').text();
var regExp = /(\+971|00971|05)\d{1,12}/g;
if(string.match(regExp)) {mobilePhones.push(string.match(regExp)[0])}
}
count--;
if (count === 0) doSomething() // all requests are done.
});
}
I'm new to node.js. The problem is reading from 2 (or more) streams which have sorted data, and producing a "sorted merge" of them.
For example:
Stream A: 1 5 6 8
Stream B: 2 3 4 7
========================
Result: 1 2 3 4 5 6 7 8
In C++/Java/C# this has a pretty obvious solution, something like:
BufferedReader[] readers = new BufferedReader[2];
String[] lines = new String[2];
// fill lines with initial values from both readers
// ...
while (true) {
int earliestIndex = -1;
// ....
// determine earliestIndex by looping over lines and comparing them
if (earliestIndex < 0) break;
String line = lines[earliestIndex];
// do something with line
System.out.println(line);
// advance reader
lines[earliestIndex] = readers[earliestIndex].readLine();
}
But in node, this seems rather difficult. Any ideas?
Here's a solution I eventually came up with. I'm using node-line-reader for reading from stream line-by-line (file stream, but this can be changed easily):
var LineReader = require('node-line-reader').LineReader;
var files = ['c:\\temp\\1.txt', 'c:\\temp\\2.txt'];
var readers = [];
var lines = [];
var readWhile = function (done) {
var earliestIndex = -1;
var earliest = MAX_VALUE;
for (i = 0; i < lines.length; i++) {
var l = lines[i];
var value = parseInt(l);
if (value < earliest) {
earliest = value;
earliestIndex = i;
}
}
if (earliestIndex < 0) {
done();
return;
}
var line = lines[earliestIndex];
console.log('Read from ' + files[earliestIndex] + ': ' + line);
readers[earliestIndex].nextLine(function (err, line) {
if (err) throw err;
lines[earliestIndex] = line;
process.nextTick(function () {
readWhile(done);
});
});
}
new Promise(function (success, error) {
for (i = 0; i < files.length; i++) {
var reader = new LineReader(files[i]);
readers.push(reader);
new Promise(function (success, failure) {
reader.nextLine(function (err, line) {
if (err) failure(err);
lines.push(line);
success();
});
}).then(function (data) {
if (lines.length == files.length) success();
});
}
}).then(function (data) {
return new Promise(function (success, failure) {
readWhile(success);
});
}).then(function() {
console.log('All done');
}, function (err) {
console.log('Error: ' + err);
});
After asking a question and getting a very helpful answer on what the 'Async Juggling' assignment in learnyounode was asking me to do, I set out to implement it myself.
The problem is, my setup isn't having any success! Even though I've referred to other solutions out there, my setup simply isn't returning any results when I do a learnyounode verify myscript.js.
GIST: jugglingAsync.js
var http = require('http');
var app = (function () {
// Private variables...
var responsesRemaining,
urls = [],
responses = [];
var displayResponses = function() {
for(var iterator in responses) {
console.log(responses[iterator]);
}
};
// Public scope...
var pub = {};
pub.main = function (args) {
responsesRemaining = args.length - 2;
// For every argument, push a URL and prep a response.
for(var i = 2; i < args.length; i++) {
urls.push(args[i]);
responses.push('');
}
// For every URL, set off an async request.
for(var iterator in urls) {
var i = iterator;
var url = urls[i];
http.get(url, function(response) {
response.setEncoding('utf8');
response.on('data', function(data) {
if(response.headers.host == url)
responses[i] += data;
});
response.on('end', function() {
if(--responsesRemaining == 0)
displayResponses();
});
});
}
};
return pub;
})();
app.main(process.argv);
Question: What am I doing wrong?
This line
for(var iterator in urls) {
doesn't do what you think it does. It actually loops over the properties of urls (see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in). Instead, you have to do something like
for(var i = 0; i < urls.length; i++) {
var url = urls[i];
...
}
or
urls.forEach(function(url, index) {
...
});
In addition to not properly looping through the arrays inside the app module, I was also not properly concatenating data returned from the response.on('data') event. Originally I was doing...
responses[index] += data;
Instead, the correct thing to do was:
responses[index] = responses[index] + data;
Changing that, as well as the things noted by #arghbleargh got the 'Async Juggling' to fully verify!
I have tested my code and it all worked:
~ $ node juggling_async.js site1 site2 site3 site4 ...
The JS code does not limit only to three sites.
var http = require('http');
// Process all the site-names from the arguments and store them in sites[].
// This way does not limit the count to only 3 sites.
var sites = [];
(function loadSites() {
for(var i = 2, len = process.argv.length; i < len; ++i) {
var site = process.argv[i];
if(site.substr(0, 6) != 'http://') site = 'http://' + site;
sites.push(site);
}
})();
var home_pages = [];
var count = 0;
function httpGet(index) {
var home_page = '';
var site = sites[index];
http.get(site, function(res) {
res.setEncoding('utf8');
res.on('data', function(data) {
home_page += data;
});
res.on('end', function() {
++count;
home_pages[index] = home_page;
if(count == sites.length) {
// Yahoo! We have reached the last one.
for(var i = 0; i < sites.length; ++i) {
console.log('\n############ Site #' + (+i+1) + ': ' + sites[i]);
console.log(home_pages[i]);
console.log('============================================\n');
}
}
});
})
.on('error', function(e) {
console.log('Error at loop index ' + inddex + ': ' + e.message);
})
;
}
for(var i = 0; i < sites.length; ++i) {
httpGet(i);
}