I have a JavaScript (Node) function to grab the content of a web page and handle it with a callback:
'using strict';
var http = require('http');
function download(url, callback) {
http.get(url, function(res) {
var content = '';
res.on('data', function (chunk) {
content += chunk;
});
res.on('end', function() {
callback(content);
});
}).on('error', function() {
callback(null);
});
}
What I don't understand is why I can't simply return the result on 'end'. Clearly, when the 'end' event is emitted, the 'content' variable contains a string with the content of a web page, otherwise it couldn't be submitted to the callback function, so why can't I just return it like this:
function download2(url) {
http.get(url, function(res) {
var content = '';
res.on('data', function(chunk) {
content += chunk;
});
res.on('end', function() {
return content;
});
}).on('error', function() {
return null;
});
}
download2 always returns undefined. Why?
These are asynchronous functions. They have already long since completed before the callback functions are called. Thus the desired return result is not known when either of your download functions returns. For data passed to asynchronous callbacks, the ONLY place you can do something with that data is from the callback itself. You can put your code to handle that data in the callback or you can call some other function from within the callback and pass the data to it.
This is asynchronous programming and you really have to get used to it in node because it's used there a lot. It is significantly different than synchronous programming in that you can't call a function that starts an asynchronous operation and expect the parent function to get the result and return it. The result of the asynchronous operation won't be known until sometime later, long after the parent function has already returned.
Thus, the way you've structured it in your first download() function is the usual way of handling this.
Related
This question already has answers here:
How do I return the response from an asynchronous call?
(41 answers)
What is the difference between synchronous and asynchronous programming (in node.js)
(10 answers)
Closed 6 years ago.
I've recently picked up Node and see that things don't always run sequentially. I'm pretty confused as I'm used to
1) Assignment
2) Print data
Current I'm running the below function and calling var x = searchForProfessor("prof_name_here");
I then call console.log(x); only to get undefined.
I've been reading about callbacks all over the web and I can't wrap my head around the idea and apply it to this code. Can someone give me some intuition into making the above possible with callbacks?
My Function
var searchForProfessor = function searchForProfessor(teacher_name) {
google.resultsPerPage = 10
var nextCounter = 0
google(teacher_name, function (err, res){
for (var i = 0; i < res.links.length; ++i) {
var link = res.links[i];
if (!link.title.includes('Add') || !link.title.includes('RATINGS') || !link.title.includes("Hint")) {
request(link, function(err, resp, body){
if (!err && resp.statusCode == 200) { //If no error is going to happen, then print the data
var $ = cheerio.load(body); //Grab the body of data from 'prof_link'
var overall_rating = $('.breakdown-header .grade').text(); //Get the grade rating from the following classifications text
if (overall_rating.substr(0,3)) {
teacher_results.push(prof_name);
} //End if
} //End if
}); //End request
}//End if for comparisons ||
} //End For
}); //End google function
} //End searchForProfessor
Because both your google and request functions are asynchronous data returned from those functions will not be immediately available, which is why x
var x = searchForProfessor("prof_name_here");
will always give you undefined when you try to log it.
To mitigate this problem you can pass in functions as parameters to other functions to return that data when it's available.
Here's a small mockup of what's happening with your code complete with how callbacks are used.
async1 mimics a database connection - it returns an object using a key after 1 second by calling the function that's passed into it with the data as its first argument.
function async1(name, callback) {
var obj = { tea001: { first: 'Dave', last: 'Batman' } };
setTimeout(function () {
callback(obj[name]);
}, 1000);
}
Here we have your searchForProfessor function which accepts a name and a function. It calls async1 with the name, passing in the callback as the second parameter. Note the data which is returned is the returned object data from async1. The callback that was passed into searchForProfessor is used to return that data to...
function searchForProfessor(teacher_name, callback) {
async1(teacher_name, function (data) {
callback(data);
});
}
...this function call to searchForProfessor at which point you can log the data.
searchForProfessor('tea001', function (data) {
console.log(data);
});
You can see in the demo that the returned data takes around a second to be logged to the console once the code is run.
DEMO
I am trying to understand control flow in Node.js applications. Specifically does control returns to the original function once callback method completes (like a callback stack in recursive calls). I wrote a simple program that make a GET call and return the data. Here is the program:
Code:
var async = require('async');
var http = require('http');
function getGoogleData(url, callback) {
http.get(url, function(response) {
if (response.statusCode == 200) {
var googleInfo = '';
response.on('data', function(chunk) {
console.log("receiving data... ");
googleInfo += chunk;
return;
});
response.on('end', function() {
console.log("End of data receive... ");
response.setEncoding('utf8');
return callback(null, googleInfo);
});
}
console.log("I am here but why!");
//callback(new Error("GET called failed status_code=" + response.statusCode));
});
console.log("Return from get google data");
}
async.waterfall([
function(callback) {
console.log("In func 1");
getGoogleData("http://www.google.com", callback);
},
function(data, callback) {
console.log("In func 2");
callback(data);
}],
function (err, res) {
console.log("In err fn");
});
Here is output of the program:
Output:
In func 1
Return from get google data
I am here but why!
receiving data...
receiving data...
End of data receive...
In func 2
In err fn
Can someone help me understand why 'I am here but why!' line gets printed as the second output line in console log even after returning from 'data' event emitter? What is the overall control flow here?
The reason you're seeing that message logged first is that all that the code inside the if block is doing is adding event handlers. Those events are emitted some time in the future, after your console.log has already executed.
It's a similar reason why "Return from get google data" gets printed before the request finishes, because the http request is asynchronous.
This question already has answers here:
How do I return the response from an asynchronous call?
(41 answers)
Closed 5 years ago.
I am facing small trouble in returning a value from callback function in Node.js, I will try to explain my situation as easy as possible. Consider I have a snippet, which takes URL and hits that url and gives the output:
urllib.request(urlToCall, { wd: 'nodejs' }, function (err, data, response) {
var statusCode = response.statusCode;
finalData = getResponseJson(statusCode, data.toString());
});
I tried to wrap it inside a function and return a value like this:
function doCall(urlToCall) {
urllib.request(urlToCall, { wd: 'nodejs' }, function (err, data, response) {
var statusCode = response.statusCode;
finalData = getResponseJson(statusCode, data.toString());
return finalData;
});
}
Because in my Node.js code, I have a lot of if-else statement where value of urlToCall will be decided, like this:
if(//somecondition) {
urlToCall = //Url1;
} else if(//someother condition) {
urlToCall = //Url2;
} else {
urlToCall = //Url3;
}
The thing is all of the statements inside a urllib.request will remain same, except value of urlToCall. So definitely I need to put those common code inside a function. I tried the same but in doCall will always return me undefined. I tried like this:
response = doCall(urlToCall);
console.log(response) //Prints undefined
But if I print value inside doCall() it prints perfectly, but it will always return undefined. As per my research I came to know that we cannot return values from callback functions! (is it true)? If yes, can anyone advice me how to handle this situation, as I want to prevent duplicate code in every if-else blocks.
Its undefined because, console.log(response) runs before doCall(urlToCall); is finished. You have to pass in a callback function aswell, that runs when your request is done.
First, your function. Pass it a callback:
function doCall(urlToCall, callback) {
urllib.request(urlToCall, { wd: 'nodejs' }, function (err, data, response) {
var statusCode = response.statusCode;
finalData = getResponseJson(statusCode, data.toString());
return callback(finalData);
});
}
Now:
var urlToCall = "http://myUrlToCall";
doCall(urlToCall, function(response){
// Here you have access to your variable
console.log(response);
})
#Rodrigo, posted a good resource in the comments. Read about callbacks in node and how they work. Remember, it is asynchronous code.
I am facing small trouble in returning a value from callback function in Node.js
This is not a "small trouble", it is actually impossible to "return" a value in the traditional sense from an asynchronous function.
Since you cannot "return the value" you must call the function that will need the value once you have it. #display_name already answered your question, but I just wanted to point out that the return in doCall is not returning the value in the traditional way. You could write doCall as follow:
function doCall(urlToCall, callback) {
urllib.request(urlToCall, { wd: 'nodejs' }, function (err, data, response) {
var statusCode = response.statusCode;
finalData = getResponseJson(statusCode, data.toString());
// call the function that needs the value
callback(finalData);
// we are done
return;
});
}
Line callback(finalData); is what calls the function that needs the value that you got from the async function. But be aware that the return statement is used to indicate that the function ends here, but it does not mean that the value is returned to the caller (the caller already moved on.)
Example code for node.js - async function to sync function:
var deasync = require('deasync');
function syncFunc()
{
var ret = null;
asyncFunc(function(err, result){
ret = {err : err, result : result}
});
while((ret == null))
{
deasync.runLoopOnce();
}
return (ret.err || ret.result);
}
If what you want is to get your code working without modifying too much. You can try this solution which gets rid of callbacks and keeps the same code workflow:
Given that you are using Node.js, you can use co and co-request to achieve the same goal without callback concerns.
Basically, you can do something like this:
function doCall(urlToCall) {
return co(function *(){
var response = yield urllib.request(urlToCall, { wd: 'nodejs' }); // This is co-request.
var statusCode = response.statusCode;
finalData = getResponseJson(statusCode, data.toString());
return finalData;
});
}
Then,
var response = yield doCall(urlToCall); // "yield" garuantees the callback finished.
console.log(response) // The response will not be undefined anymore.
By doing this, we wait until the callback function finishes, then get the value from it. Somehow, it solves your problem.
So I'm trying to preform a https GET with node.jsand I have the following code
function get(url) {
https.request(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
})
.on('end', function(){
console.log(JSON.parse(data));
});
}).on('error', function(e) {
console.log(e.message);
}).end();
}
This code works fine and dandy except I need this function to return the data its logging
I know the recommended way to do this is to use callbacks, passing a callback function into get and then calling that function in the 'end' listener. But the problem is that this process needs to be synchronized and NOT pipelined because it causes data hazards and uses too much memory. On top of that, its is recursively called and is just one big headache to try and manage.
Basically, I'm trying to return JSON.parse(data) in the get function then the end listener is called, is that possible?
You can't synchronously return data using an asynchronous function to retrieve the data. Your get() function will return long before the https.request() has completed so you just can't do what you asked to do.
The usual design pattern for solving this involves passing in a callback function to your get() function that will be called when the data is available. This will involve restructing the caller of your function to handle an asynchronous response via a callback function.
There are some different choices in how you structure the callback, but here's the general idea:
function get(url, callback) {
https.request(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
})
.on('end', function(){
callback("success", JSON.parse(data));
});
}).on('error', function(e) {
callback("error", e);
}).end();
}
Usage:
get("http://www.example.com/myurl", function(status, data) {
if (status === "success") {
console.log(data);
}
});
May I recommend Q. It is specifically designed to help you fight the famous pyramid of callbacks in JavaScript. I understand that callbacks can lead to less-readable code but you should not try to make synchronous get requests. It kind of defeats the advantages of node.js.
You can convert
step1(function (value1) {
step2(value1, function(value2) {
step3(value2, function(value3) {
step4(value3, function(value4) {
// Do something with value4
});
});
});
});
to this -->
Q.fcall(promisedStep1)
.then(promisedStep2)
.then(promisedStep3)
.then(promisedStep4)
.then(function (value4) {
// Do something with value4
})
.catch(function (error) {
// Handle any error from all above steps
})
.done();
So in my server code, variable invites is undefined outside of the success function.
function getInvites(id){
var InvitesTable = tables.getTable("Invites").where({"PlanID": id}).select("UserID","Attending");
var invites;
InvitesTable.read({ success: function(resultss) {
invites = resultss;
console.log(invites); //works here
}});
console.log(invites); //undefined here
}
From similar questions, I realize its because of it being asynchronous. So the success function call is run after the console.log(invites); //undefined here call.
My question is how do I stop that in Windows Azure?
Added code
function read(query, user, request) {
request.execute({
success: function(results) {
results.forEach(function(r) {
getInvites(r.id, function(invites) {
r.invites = invites;
});
});
request.respond();
}
});
}
function getInvites(id, cb){
var InvitesTable = tables.getTable("Invites").where({"PlanID": id}).select("UserID","Attending");
InvitesTable.read({ success: function(results) {
if (cb) cb(results);
}});
}
You don't "stop that," you design your application around the async nature of whatever environment you're using.
I assume you're trying to do something like this:
function getInvites(id){
var InvitesTable = tables.getTable("Invites").where({"PlanID": id}).select("UserID","Attending");
var invites;
InvitesTable.read({ success: function(resultss) {
invites = resultss;
}});
return invites;
}
// later...
var invites = getInvites(someId);
//do something with `invites`
This obviously won't work, since you return the value of invites before the async call completes.
Instead, you write your app in async style:
function getInvites(id, cb){
var InvitesTable = tables.getTable("Invites").where({"PlanID": id}).select("UserID","Attending");
InvitesTable.read({ success: function(resultss) {
if (cb) cb(resultss);
}});
}
// later...
getInvites(someId, function(invites) {
//do something with `invites`
});
This leaves out error handling code for the sake of simplicity, so you'd have to add that as well.
After seeing your full code, it looks like you have a simple problem of managing many parallel asynchronous operations. Consider what happens: your loop runs, iterating over an array of n objects. For each, you call getInvites, which begins a database request and returns.
This means your loop runs very quickly, but now you have n outstanding database requests that you must wait on before you can call request.respond().
An extremely basic solution would be to do something like count the number of times your getInvites callback is called, and then finally complete the request when that number reaches n.
However, it is time-consuming and mistake-prone to manage this bookkeeping manually every time you make async requests. This is a situation where flow control libraries are extremely useful. I will use jQuery's Deferred in this example, since it may already be familiar to you (even if you don't know you've actually used it before — if you've ever used jQuery's XHR API, you've used Deferreds).
Given that you're in a server environment, you obviously don't have jQuery; however, there are people who have extracted only the code necessary for Deferred for you.
Once we have Deferreds for every pending request, we can use when to register a callback that gets called only after all pending Deferreds complete.
function read(query, user, request) {
request.execute({
success: function(results) {
var dfds = [];
for (var i = 0; i < results.length; i++) {
dfds.push(getInvites(results[i].id)); // Makes an array of Deferreds representing
// each of our pending requests.
}
Deferred.when.apply(Deferred, dfds) // see details below
.done(function() {
for (var i = 0; i < results.length; i++) {
results[i].invites = arguments[i]; // Copy each set of invites to each result
}
request.respond(); // We're done!
})
.fail(function() {
// Handle errors here
});
}
});
}
function getInvites(id){
var dfd = new Deferred(); // Create a new Deferred, which automatically starts in the 'pending' state
var InvitesTable = tables.getTable("Invites").where({"PlanID": id}).select("UserID","Attending");
InvitesTable.read({ success: function(results) {
dfd.resolve(results); // When we get data back, we 'resolve' the Deferred --
// in other words, say its operation is done,
// and pass along the operation's results.
},
error: function(err) { // TODO: Not sure if this is how the API you're using handles errors
dfd.reject(err); // Marks the Deferred as failed.
}});
return dfd.promise(); // We (synchronously) return the Promise. The caller can attach event handlers
// to the Promise, which are invoked when we eventually resolve or reject the Deferred.
}
Notes:
jQuery.when (or in this server-side case, Deferred.when) normally expects you to pass a fixed number of Deferreds as arguments:
$.when(dfd1, dfd2).done(function(result1, result2) { ... });
However, we have a variable number of Deferreds, so we must apply an array of Deferreds to when and then in the done handler, access each result via the implicit arguments object.
Array.forEach(...) is slow. In most cases, it is better to use a regular for loop.
I've stumbled on same need for synchronous DB access, so I wrote small module called query-synchronizer.
Idea was to count how many times query was started and ended. If all started count was equal to ended count, other part of code would be executed. Your code would look like this:
var synchronizer = require('query-synchronizer');
function read(query, user, request) {
request.execute({
success: function(results) {
results.forEach(function(r) {
var InvitesTable = tables.getTable("Invites").where({"PlanID": r.id}).select("UserID","Attending");
synchronizer.read(InvitesTable, function(results){
r.invites = invites;
});
});
synchronizer.done(function(){
request.respond();
});
}
});
}