I need to resize some images. Problem is the library I'm using do one resize per callback.
I want to resize several images so I put it in a loop:
exports.resizeImages = function (req, res) {
var images = fs.readdirSync('uploads/');
for (var n = 0; n < files.length; n++) {
var tgt = 'uploads/resized/' + images[n];
gm(tgt).resize(150).write(tgt, function (err) {
if (err) {
console.log('resizing failed');
res.status(400).send('failed to resize');
return;
}
if (n == images.length) {
res.status(200).send();
}
});
}
}
I'm aware I can't do it like this. I need make the loop wait until the callback responds somehow. I've seen some examples but I can't get it to work.
Any ideas?
You could also use the node async module
Something like this:
var async = require('async');
exports.resizeImages = function (req, res) {
var images = fs.readdirSync('uploads/');
async.each(images, function(file, callback) {
var tgt = 'uploads/resized/' + file;
gm(tgt).resize(150).write(tgt, callback);
}, function(err) {
if(err) {
console.log('resizing failed');
return res.status(400).send('failed to resize');
} else {
//no error
return res.status(200).send();
}
});
}
You need to write a for loop using promises. Pick your favorite promise library. The details are covered in this question or this question. Or in this blog post:
var Promise = require('bluebird');
var promiseWhile = function(condition, action) {
var resolver = Promise.defer();
var loop = function() {
if (!condition()) return resolver.resolve();
return Promise.cast(action())
.then(loop)
.catch(resolver.reject);
};
process.nextTick(loop);
return resolver.promise;
};
Promises are great for this, indeed. There are other libraries that wrap callbacks into this kind of abstractions, but since Promises are standard it's better to learn just one thing.
If you want to keep it bare, you can use an external counter:
var images = fs.readdirSync('uploads/');
var processed = 0;
for (var n = 0; n < files.length; n++) {
var tgt = 'uploads/resized/' + images[n];
gm(tgt).resize(150).write(tgt, function (err) {
if (err) {
console.log('resizing failed');
res.status(400).send('failed to resize');
return;
}
processed++;
if (processed == images.length) {
res.status(200).send();
}
});
}
That is assuming that you only send 200 OK if all images have been correctly resized.
Like others have mentioned you could use promises, but if you don't want to start using promises in your project async.js is perfect for this kind of thing.
Your code rewritten using async.js:
exports.resizeImages = function (req, res) {
async.waterfall([
function readImages_step(done) {
readdir('uploads/', done);
},
function uploadImages_step(images, done) {
async.each(images, function(image, cb) {
var target = 'uploads/resized/' + image;
gm(target).resize(150).write(target, cb);
}, done);
}
], function (err) {
if (err) {
console.log('resizing failed');
return res.status(400).send('failed to resize');
}
return res.status(200).send();
}
};
I changed your readdirsync call to be asynchronous. Async.each will run each upload in parallel.
Related
I'm building a simple scraper using Request.js and Cheerio.js within Express. Right now I'm only looking for the title of the site. Instead scraping a website one by one, I put the list in an array. I parse through them and then use Cheerio.js to find the Title of the website. When I console log the titles, they come out fine, but I want to ultimately show them on a html page. Please note, I'm very new to programming so if you could provide detailed feedback, that would be incredibly helpful (below is the code I've been working on). Thanks in advance!
function parseSites(urls) {
var parsedSites = [];
urls.forEach(function(site) {
request(site, function(err, res, body) {
if(err) {
console.log(err);
} else {
var $ = cheerio.load(body);
parsedSites.push($('title').text());
}
}
});
});
return parsedSites;
}
Please refer to the below code for a working implementation
var request = require('request-promise')
var cheerio = require("cheerio")
function parseSites(urls, callback) {
var parsedSites = [];
var promiseList = urls.map(getPage)
Promise.all(promiseList).then(function (data) {
callback(data.map(parse))
})
return parsedSites;
}
function getPage(url) {
return request.get(url)
}
function parse(body) {
console.log("parsing body")
var $ = cheerio.load(body);
return $('title').text()
}
parseSites(['https://www.google.com','https://www.facebook.com'],function(data) {
console.log(data)
})
First you need to understand the difference between asynchronous and synchronous code. Lets see an example:
function testFor() {
for(let i=0;i<5;++i){
console.log(i);
}
}
-
console.log('start:');
testFor();
console.log('end:');
// Here you get the expected output because this code is synchronous.
//output:
start:
0
1
2
3
4
end:
-
console.log('start:');
setTimeout(testFor,1000);
console.log('end:');
// Here you don't get your expected output because setTimeout is asynchronous .
//output:
start:
end:
0
1
2
3
4
First the console.log('start:'); is called.
Then setTimeout(testFor,1000); (but it is async and the call
will execute in 1 second).
Immediately after the console.log('end:');
is called.
Finally 1 second after, the testFor() is executed and it
prints 0 1 2 3 4
The next point is that there is an error in your code!
function parseSites(urls) {
var parsedSites = [];
urls.forEach(function(site) {
request(site, function(err, res, body) {
if(err) {
console.log(err);
} else {
var $ = cheerio.load(body);
parsedSites.push($('title').text());
}
//} ! THIS bracket should be removed
});
});
return parsedSites;
}
So your problem is that the 'request' in the forEach loop is an async function that will call the callback 'function(err, res, body)' once there is a response from the web page.
My solutions for this:
'use strict'
const cheerio = require('cheerio');
const request = require('request');
const async = require('async');
const urls = ['http://stackoverflow.com/','http://hackaday.com/','https://www.raspberrypi.org/','https://cheerio.js.org/'];
//SOLUTION 1: do what you need to do when all calls are done using recursion
let i=0;
let parsedSites = [];
parseSites(urls[i],parsedSites);
function finalCall(sites) {
console.log(sites);
}
function parseSites(site,parsedSites) {
++i;
request(site, function(err, res, body) {
if(err) {
console.log(err);
} else {
let $ = cheerio.load(body);
let title = $('title').text();
console.log(title);
parsedSites.push(title);
}
if(i<urls.length){
parseSites(urls[i],parsedSites);// recursive call;
}
else{
finalCall(parsedSites);// when all sites are done.
}
});
//return parsedSites;// cant return! we are in async calls!
}
//SOLUTION 2: do what you need to do when all calls are done using 'async'
parseSites(urls);
function finalCall(sites) {
console.log(sites);
}
function parseSites(urls) {
let parsedSites = [];
async.each(urls,function parseSite(site, callback) {
request(site, function (err, res, body) {
if (err) {
callback(err);
} else {
let $ = cheerio.load(body);
parsedSites.push($('title').text());
callback();
}
})
},function (err) {
if(err) console.log(err);
else finalCall(parsedSites);
});
}
Async github page
Async example
Question: Why won't a var things return a value from outside the walk() function? And how do I fix it?
Hypothosis: this is async and the console.log is happening too early. Which would lead me to how can I make this a Promise (i'm using node 4.1.1)
var walk = function(dir, done) {
var results = [];
fs.readdir(dir, function(err, list) {
if (err) return done(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) return done(null, results);
file = dir + '/' + file;
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function(err, res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
};
function traverseDirectories() {
var things = walk('src/', function(err, results){
if(err) throw err;
console.log(results) // ['dir/thing.png', 'dir/thing2.png']
return results;
});
console.log(things) // undefined
};
traverseDirectories();
Q. Why won't a var things return a value from outside the walk() function?
R. Because walk doesn't return anything (take a look and you''ll see that it's a void function).
Even if you make it a Promise, you won't be able to use it like:
var things = walk(...);
console.log(things);
Because Promises are thenable, and are still async, so it will be:
walk(...).then(function(things) {
// do something with things here
});
To do what you want, you would need something that doesn't exist in current Javascript yet.
There is an ES7 proposal of native async/await that will be a callback heaven, but atm, you can use:
Async/Await library (It's an amazing library, but very far from native, and performance isn't cool)
ES7 transpiler - you can write the ES7 code today, and it will transpile for you to ES5 (e.g Babel)
But, if you're already using the newest version of NodeJS (4.0.0 as the time of writing) - and if you're not, you really should - the best way of achieving what you want is to use generators.
Combined with a small library named co, it will help you to achieve almost what the ES7 async/await proposes, and it will mostly use native code, so both readability and performance are really good:
var co = require('co');
var traverseDirectories = co(function *traverseDirectories() {
var things = yield walk('src/');
console.log(things) // there we go!
});
function walk(dir, results) {
return new Promise(function(resolve, reject) {
fs.readdir(dir, function(err, list) {
if (err)
reject(err);
var i = 0;
(function next() {
var file = list[i++];
if (!file) resolve(results);
file = dir + '/' + file;
fs.stat(file, function(err, stat) {
if (stat && stat.isDirectory()) {
walk(file).then(function(res) {
results = results.concat(res);
next();
});
} else {
results.push(file);
next();
}
});
})();
});
});
}
You can read more about this subject in this awesome Thomas Hunter's blog post.
I have an array of urls like this
var urls = ["www.google.com", "www.yahoo.com"];
And I want to loop though the urls and perform an async task inside the loop and not move on to the next item until the async task has finished. I know you can do this with promises but I have having some trouble with it. Here what I have
var xmlReader = require('cloud/xmlreader.js');
function readResponse_async(xlmString) {
var promise = new Parse.Promise();
xmlReader.read(xlmString, function (err, res) {
if(err) {
promise.reject(err);
} else {
promise.resolve(res);
}
});
return promise;
}
for (i = 0; i < urls.length; i++) {
Parse.Cloud.httpRequest({
url: unionUrls[i],
}).then(function(httpResponse) {
try {
// console.log(httpResponse.text)
return readResponse_async(httpResponse.text)
} catch (e) {console.log(e)}
}
But right now it doesn't wait for the readResponse_async to finish, how can I have it wait for that?
Thanks
EDIT
After reading the response I make a save to my database and I have another array like this
var location = ['USA', 'England'];
And I make the save like this
function saveLoc_async(data, location) {
var i3, i4, i5, m,
TestItem = Parse.Object.extend("TestItem"),//can be reused within the loops?
promise = Parse.Promise.as();//resolved promise to start a long .then() chain
for (i3 = 0; i3 < data.count(); i3++) {
(function(testItem) {
testItem.set("item", data.at(i));
testItem.set("location", location);
//build the .then() chain
promise = promise.then(function() {
return testItem.save();
});
})(new TestItem());
//************************
//CALL retry(); here?
//**************************
}
Because with your answer I have
function retry() {
if (urlsUnion.length > 0) {
var nextUrl = urlsUnion.pop();
//********** ADDED LINE
var nextLoc = location.pop();
Parse.Cloud.httpRequest({
url: nextUrl,
}).then(function(httpResponse) {
xmlReader.read(httpResponse.text, function (err, res) {
if(err) {
// show an error
} else {
//********** ADDED LINE
saveLoc_async(res, nextLoc);
retry();
}
});
});
}
}
SO where should retry(); go because right now with the save sometimes it puts the second location with one of the first items url? why would that happen?
I did something similar to this for an animation.
var actions = [drawXXX, fadeOutYYY, drawXYZ];
this.startAnimation = function () {
actions.reduce(function (previousAction, nextAction) {
return previousAction.then(nextAction)
}, $.when());
}
Your code fires both urls immediately, and does not wait in-between.
What you would have to do is to remove the first url from the array and fire it. In the 'then' branch check if you still have url's in the array and repeat.
Like this (untested, edited to make the code clean again):
var xmlReader = require('cloud/xmlreader.js');
function readResponse_async(xlmString) {
xmlReader.read(xlmString, function (err, res) {
if(err) {
// show an error
} else {
readFirstUrl();
}
});
}
function readFirstUrl() {
if (urlsUnion.length == 0) {
return;
}
var url = urlsUnion.pop();
Parse.Cloud.httpRequest({
url: url,
}).then(function(httpResponse) {
readResponse_async(httpResponse.text);
});
}
readFirstUrl();
Not sure I understand your use of unionUrls array, but if you have your URL's in a urls array, I think this is pretty clean:
function getUrl(url) {
return Parse.Cloud.httpRequest(url)
.then( function(httpResponse) {
return readResponse_async(httpResponse.text);
});
}
urls.reduce( function(prev, url) {
return prev ? prev.then( function() { getUrl(url); }) : getUrl(url);
}, null);
I am trying to push some values to array by fetching data from Jenkins APIs, like below.
buildNum = 14;
async.waterfall([
function(callback){
for ( var i = buildNum; i > (buildNum-5); i--) {
(function(){
jenkins.build_info('BuildDefinitionRequest', i, function(err, data) {
if (err){ return console.log(err); }
var tmpObj = {};
tmpObj.jobID = data.fullDisplayName;
tmpObj.result = data.result;
tmpObj.dateTime = data.id;
console.log(tmpObj);
finalArray.push(tmpObj);
});
})();
}
callback(null, finalArray, 1);
},
function(finalArray, value, callback){
console.log(finalArray, value);
callback(null, 'done');
}
],function(err, result){
});
But "callback(null, finalArray, 1);" is getting called before the for loop finish its execution.
When I am printing the value of "finalArray" inside the for loop I am able to see all the values.
Technically the for loop has finished executing, but the jenkins.build_info calls haven't. You cannot make async calls inside of a for loop like that and expect the for loop to only finish after all the calls are complete. You're already using async, so this is an easy fix. I would do something like this:
var buildNum = 14;
var builds = [];
// just builds a collection for async to operate on
for(var i = buildNum; i > (buildNum - 5); i--) {
builds.push(i);
}
var finalArray = [];
async.each(builds, function(build, next) {
jenkins.build_info('BuildDefinitionRequest', build, function(err, data) {
if (err) { next(err); }
var job = {
jobID: data.fullDisplayName,
result: data.result,
dateTime: data.id
};
finalArray.push(job);
next();
});
}, function(err) {
// this won't be called until all the jenkins.build_info functional have completed, or there is an error.
console.log(finalArray);
});
I want to call a function after an asynchronous for loop iterating through values of an Javascript object finishes executing. I have the following code
for (course in courses) {
var url = '...' + courses[course];
request(url, (function (course) {
return function (err, resp, body) {
$ = cheerio.load(body);
//Some code for which I use object values
};
})(course));
}
This can be done in vanilla JS, but I recommend the async module, which is the most popular library for handling async code in Node.js. For example, with async.each:
var async = require('async');
var courseIds = Object.keys(courses);
// Function for handling each course.
function perCourse(courseId, callback) {
var course = courses[courseId];
// do something with each course.
callback();
}
async.each(courseIds, perCourse, function (err) {
// Executed after each course has been processed.
});
If you want to use a result from each iteration, then async.map is similar, but passes an array of results to the second argument of the callback.
If you prefer vanilla JS, then this will work in place of async.each:
function each(list, func, callback) {
// Avoid emptying the original list.
var listCopy = list.slice(0);
// Consumes the list an element at a time from the left.
// If you are concerned with overhead in using the shift
// you can accomplish the same with an iterator.
function doOne(err) {
if (err) {
return callback(err);
}
if (listCopy.length === 0) {
return callback();
}
var thisElem = listCopy.shift();
func(thisElem, doOne);
}
doOne();
}
(taken from a gist I wrote a while back)
I strongly suggest that you use the async library however. Async is fiddly to write, and functions like async.auto are brilliant.
A possible simple JS solution would be to do something like this.
var courses = {
lorum: 'fee',
ipsum: 'fy',
selum: 'foe'
};
var keys = Object.keys(courses);
var waiting = keys.length;
function completedAll() {
console.log('completed all');
}
function callOnCourseComplete(course, func) {
console.log('completed', course);
waiting -= 1;
if (!waiting) {
func();
}
}
var delay = 10000;
keys.forEach(function(course) {
var url = '...' + courses[course];
console.log('request', url);
setTimeout((function(closureCourse) {
return function( /* err, resp, body */ ) {
// Some code for which I use object values
callOnCourseComplete(closureCourse, completedAll);
};
}(course)), (delay /= 2));
});
Update: Probably a better Javascript solution would be to use Promises
const courses = {
lorum: 'fee',
ipsum: 'fy',
selum: 'foe',
};
function completedAll() {
console.log('completed all');
}
function callOnCourseComplete(courseName) {
console.log('completed', courseName);
}
let delay = 10000;
const arrayOfPromises = Object.keys(courses).map(courseName => (
new Promise((resolve, reject) => {
const url = `...${courses[courseName]}`;
console.log('request', url);
setTimeout((err, resp, body) => {
if (err) {
reject(err);
}
// Some code for which I use object values
resolve(courseName);
}, (delay /= 2));
}))
.then(callOnCourseComplete));
Promise.all(arrayOfPromises)
.then(completedAll)
.catch(console.error);