NodeJS creating a callback inside a request function - javascript

I'm currently scraping data from a webpage and then pushing it to an array, the code currently looks like this
url = //url
let data = [];
request(url, function (err, response, html) {
if (!err) {
var $ = cheerio.load(html);
$('#id').each(function (i, element) {
data_element = //(this).find...
data.push(data_element);
});
}
console.log(data); //console logs the data inside the request
})
console.log(data); //logs empty array outside of request
The data is logged when I call console log inside the request, but if I call it outside of the request function then it returns an empty array. I know I need to use a callback function but I was wondering what the best way to go about this is, as I will be making multiple requests inside my function.

You should use promises instead of callback functions.
A good promise library is Bluebird
You can find an example of using Promise instead of callback here
Each function should return a promise. You need to "wait" for all promises to finish by using Promise.all (link to documentation).
Then, you can write it all to your log
Here's an example:
const rp = require('request-promise'); // A package for Request with Bluebird promises
const Promise = require('bluebird');
const url = //url
let data = [];
const options = {
uri: url
};
const p1 = rp(options).then((response) => {
var $ = cheerio.load(response.body);
$('#id').each(function (i, element) {
data_element = //(this).find...
data.push(data_element);
});
console.log(data); //console logs the data inside the request
return data; // this data will be available on results parameter on Promise.all
});
const p2 = // another request
Promise.all([p1, p2]).then((results) => {
console.log(results) // print whatever you want
})

I think its because "let" type of variable stops it. change it to var and it should be working.

Related

How to wait for the iteration to finish when pushing result of a callback function into an array

What is the correct way to implement array.push so that it "array_of_results" is returned after the forEach iteration if finished?
const postgres = require("./postgres");
function get_array(value) {
var array_of_results = []
value.forEach( item => {
postgres.query(item["id"],function(res){
console.log(res) //gives proper res after empty array
array_of_results.push(res);
})
});
console.log(array_of_results)// prints empty array
return array_of_results;
}
Edit:
and postgres.js looks like :
const { Pool } = require("pg");
const pool = new Pool();
var query_string = "select...."
function query(id, call) {
pool.query(query_string, [id], (err, res) => {
if (err) {
console.log(err.stack)
} else {
call(res.rows[0])
}
})
}
module.exports = {
query
}
There are a few ways to do this, but first you need to understand what is actually happening.
In postgres.query(item["id"],function(res){ you are calling postgres.query with (1) an item ID and (2) a callback function. That call happens and then immediately continues in your calling code. So now you've just sent a bunch of requests to your database, and then immediately return an empty array. Those callbacks (2) have not been called yet.
To get the data back to your calling function, you'll need to either pass a callback instead of using return, or change to async/await.
Using async/await in every iteration of your loop is not as efficient, as you're waiting for each call to return sequentially. For the most efficient method, you will need to fire the requests and wait for them all to complete. You can do this by using promises.
You can modify your code to push a promise into an array for each iteration of the loop, then call (and await) Promise.all on the array of promises.
Here's a basic rewrite for you:
postgres.js:
function query(id) {
return new Promise((resolve, reject) => {
pool.query(query_string, [id], (err, res) => {
if (err) {
console.log(err.stack)
reject(err)
} else {
resolve(res.rows[0])
}
})
})
}
module.exports = {
query
}
get_array implementation :
async function get_array(value) {
var array_of_promises = [], array_of_results = []
value.forEach( item => {
array_of_promises.push(postgres.query(item["id"]));
});
array_of_results = await Promise.all(array_of_promises);
console.log(array_of_results)// prints populated array
return array_of_results;
}
Note that when you call get_array you'll have to use await before the call, e.g. change var array = get_array(items) to var array = await get_array(items) and using await in a function requires it to be declared as an async function.
If you can't declare it as an async function, you may change the calling code to consume the promise:
var arrayPromise = get_array(items);
arrayPromise.then((results) => {
// do something with results
// but remember you cannot _return_ from within a callback, as discussed above
});

How to return from a looped asynchronous function with callback in Node

I am trying to write a function that:
Takes an array of URLs
Gets files from URLs in parallel (order's irrelevant)
Processes each file
Returns an object with the processed files
Furthermore, I don't need for errors in #2 or #3 to affect the rest of the execution in my application in any way - the app could continue even if all the requests or processing failed.
I know how to fire all the requests in a loop, then once I have all the data, fire the callback to process the files, by using this insertCollection pattern.
However, this is not efficient, as I shouldn't need to wait for ALL files to download before attempting to process them - I would like to process them as each download finishes.
So far I have this code:
const request = require('request');
const urlArray = [urlA, urlB, urlC];
const results = {};
let count = 0;
let processedResult;
const makeRequests = function (urls, callback) {
for (let url of urls) {
request(url, function(error, response, body) {
if (error) {
callback(error);
return;
}
processedResult = callback(null, body)
if (processedResult) {
console.log(processedResult); // prints correctly!
return processedResult;
}
})
}
};
const processResult = function(error, file) {
if (error) {
console.log(error);
results.errors.push(error);
}
const processedFile = file + `<!-- Hello, Dolly! ${count}-->`;
results.processedFiles.push(processedFile);
if (++count === urlArray.length) {
return results;
}
};
const finalResult = makeRequests(urlArray, processResult);
console.log(finalResult); // undefined;
In the last call to processResult I manage to send a return, and makeRequests captures it, but I'm failing to "reign it in" in finalResult after that.
My questions are:
Why is this not working? I can print a well-formed processedResult
on the last iteration of makeRequests, but somehow I cannot return
it back to the caller (finalResult)
How can this be solved, ideally "by hand", without promises or the
help of libraries like async?
The makeRequests function returns undefined to finalResult because that is a synchronous function. Nothing stops the code executing, so it gets to the end of the function and, because there is no defined return statement, it returns undefined as default.

A promise within a function

My program downloads a large amount of data, processes it, and makes it available through a returned function. The program gets ahread of the download, so I am adding promises to make it wait for the data to arrive.
function dataSource(...) {
var _data = null;
// download: a promise that returns data for the _data object.
let download = function() { ... }
return function(...) {
if (!_data) {
download(...).then(data => _data = data).done();
}
var datum = _data[key];
var outbound = doSomethingWithData(datum);
return outbound;
}
}
My code is structured like this because the function that Engine returns makes my code very neat.
var generate = dataSource(param1,param2);
var fullName = generate("malename")+" "+generate("malename")+" "+generate("surname");
The specific requirements are:
Download the data only once.
Query the data by key any number of times without downloading the data again.
Do not change the existing interface.
I could have dataSource return a promise rather than a function. I know what the pattern for using promises looks like. But that will force me to rewrite the code that consumes this function. This pattern is used extensively throughout the code, and changing it isn't an acceptable solution.
How can I structure this to ensure that my function doesn't return until it has the data, without returning the promise?
This should fix it
function dataSource(){
return function(){
return download(...).then(data=>doSomethingWithData(data[key]));
};
}
var generate = dataSource();
Promise
.all(["malename","malename","surname"].map((name)=>return generate(name)))
.spread((name1,name2,name3)=>{
return [name1,name2,name2].join(" ");
});
the spread is not needed, but it helps for illustration purposes
Have dataSource return a promise rather than the data function. The revised dataSource looks like this:
function dataSource(...) {
var _data = null;
// download: a promise that returns data for the _data object.
let download = function() { ... }
function _generate(...) {...}
return download(group,subgroup,options).then(data => _data = data).then(() => _generate);
}
Then, where the code is consumed, get the generate function from the returned promise:
let generate = function() {};
dataSource.then(fn => generate = fn).done();

variable not defined node.js

I'm trying to use Node.js to get a response from an API, I want to clean the API response and use the result.
So to access the first API I have the following code.
To store and use the result I believe I need to store the JSON output globally.
However, I can't work out how to do this.
Example -
var request = require('request');
request({url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true}, function(err, res, json) {
if (err) {
throw err;
}
car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
console.log(car2go);
});
This prints the desired result however I know that this is because my variable is defined within the function.
I want to access the variable outside of the function.
To test if I could do this I changed the code to -
var request = require('request');
request({url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true}, function(err, res, json) {
if (err) {
throw err;
}
car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
});
console.log(car2go);
But if I do this I get
ReferenceError: car2go is not defined
I am running Node v0.12.2 on Mac OS Yosemite (10.10.3).
Admittedly I am very new to node and I am more familiar with R, Python and PL SQL.
There is no way to get reference to it outside of the callback function because the console.log line runs before the callback function is invoked. The reason you have to pass a callback function into the request API is because the request library needs to invoke that function when it's done making the request. Meanwhile, your app moves on and does other things (such as running that console.log line) while it waits for the callback function to fire.
That said, there are a number of ways to deal with asynchronous code. My favorite way is with promises. I use a library called bluebird for handling promises.
var request = require('request');
var Promise = require('bluebird');
var requestP = Promise.promisify(request);
The call to Promise.promisify(request) returns a new function that doesn't take a callback function, but instead returns a promise.
requestP({ url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true })
.spread(function(res, json) {
var car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
})
.then(function (car2go) {
console.log(car2go);
})
.catch(function (err) {
console.error(err);
});
Note: .spread is the same as .then except if the resolved value is an array (which it will be because the callback passed to the request library accepts 2 arguments, which bluebird will translate into an array that the promise resolves to) .spread will split up the array back into multiple arguments passed into the function you give to .spread.
Promise.resolve(['hi', 'there']).then(function (result) {
console.log(result); // "['hi', 'there']"
});
Promise.resolve(['hi', 'there']).spread(function (str1, str2) {
console.log(str1); // 'hi'
console.log(str2); // 'there'
});
You're not going to be able to return that value all the way back out to the same context from which you began the asynchronous call, but you can at least write code that looks somewhat synchronous when using promises.
Without promises you'll be forced to call functions from within functions from within functions from within functions ;)
The response is asynchronous. That means the callback function gets called sometime LATER in the future so your console.log(car2go) is executing BEFORE the callback has even been called.
The only place you can reliably use the response is inside the callback or in a function called from the callback. You cannot use it the way you are trying to. Using asynchronous responses in Javascript requires programming in an asynchronous fashion which means processing results and using results IN the asynchronous callbacks only.
Here's where the console.log() should be:
var request = require('request');
request({url: 'https://www.car2go.com/api/v2.1/vehicles?loc=wien&oauth_consumer_key=car2gowebsite&format=json', json: true}, function (err, res, json) {
if (err) {
throw err;
}
car2go = json.placemarks;
for (i = 0; i < car2go.length; i++) {
delete car2go[i].address;
delete car2go[i].charging;
delete car2go[i].exterior;
delete car2go[i].interior;
delete car2go[i].smartPhoneRequired;
delete car2go[i].vin
car2go[i].vendor = 'car2go';
car2go[i].city = 'wien';
car2go[i].carmake = 'Smart';
car2go[i].carmodel = 'Fortwo';
}
// here is where the result is available
console.log(car2go);
});

Structuring promises within angularjs

I have done a lot of reading around this, but ultimately the tutorials and guides I have found differ too much for me to get a decent grasp on this concept.
This is what I want to achieve:
1) Simple http request from our server [Any API for demonstration]
2) Run a function with data from (1). [Remove a property from the object]
3) Use result and length of (2) to run a loop of $http requests to our server. [Or any server]
4) This will result in 6 different objects. Run a function on these 6 objects. [Add a property]
5) Once ALL of this is done, run a separate function [Log "finished"]
How can this be achieved using promises? How do I pass data from (1) via a promise to (2)? Is this the right way to achieve what I need to do?
If anyone can show me how this should be structured it would be immensely helpful; I have kept the functions as simple as possible for this question.
Yes, promises are very nice to structure solutions for this kind of problems.
Simplified solution (more or less pseudo-code):
$http(...)
.then(function(response) {
// do something with response, for example:
var list = reponse.data.list;
// return it so that you can use it in the next 'then'.
return list;
})
.then(function(list) {
var promises = [];
angular.forEach(list, function(item) {
// perform a request for each item
var promise = $http(...).then(function(itemResponse) {
itemResponse.extraProperty = true;
return itemResponse;
});
// we make an array of promises
promises.push(promise);
});
// combine all promises into one and return it for the next then()
return $q.all(promises);
})
.then(function(itemsList) {
// itemsList is now an array of all parsed item responses.
console.log(itemsList);
});
(Hopefully this is right, I did not tested it.)
As you can see, you can return values in a callback to pass it to the next then(), or you can pass a promise, and this will result in calling the next callback when it resolves. $q.all() is used to combine multiple promises into one and resolve if all are resolved.
Edit: I realised that you can optionally leave out these three lines:
return list;
})
.then(function(list) {
But it is nice syntax though, because the separation of tasks is more visible.
Check code below, it could contains syntax error, the important is the structure. Step3 contains multiple(6) $http requests, it waits until the last request response to return a unique response object (array) containing response for each $http requets.
//Step 1
var Step1 = function () {
$http.get('api/controller').success(function (resp) {
var object1 = resp;
Step2(object1);
Step3(object1).then(function (resp) {
//resp.data is an array containing the response of each $http request
Step4(resp);
Step5();
});
});
}
//Step2
var Step2 = function(obj){
//do whatever with the object
}
//Step3
var Step3 = function (object1) {
var call = $q.defer();
var get1 = $http.get(object1[0].url);
var get2 = $http.get(object[1].url2);
//...
var get6 = $http.get(object[5].url6);
$q.all([get1, get2,..get6]).then(function (resp) {
call.resolve(resp);
});
return call.promise;
}
//Step4
var Step4 = function (resp) {
for (var i=0; i<resp.data.lenght;i++){
DoWhatEver(resp.data[i]);
};
}
//Step5
var Step5 = function () {
alert("Finished");
}
Step1(); //Call Step1 function
Don't know why you have difficulty implementing this, but maybe $q.all() is what you're missing:
var config1={method:'GET',url:'/api/...'};
$http(config1).success(function(resultsFrom1){
functionForResultsOf1(resultsFrom1);
})
var functionForResultsOf1 = function(resultsOf1){
//remove something from the result, assuming this is a synchronous operation
resultsOf1.splice()...;
var promises=makePromises(*pass whatever you want*);
$q.all(promises).then(function(aggregateOfAllCallsToServer){
angular.forEach(aggregateOfAllCallsToServer,function(data){
//do something to data from each call to the server
})
console.log("finished");
})
}
var makePromises = function(serverUrls){
var promises = [];
angular.forEach(serverUrls, function(url) {
var promise=$http({
url : '/api/'+url,
method: 'GET',
})
promises.push(promise);
});
return $q.all(promises);
}

Categories

Resources