Getting records from DynamoDB recursively using Q.Promises - javascript

I am having trouble implementing Q promises with recursive dynamodb call, new to nodejs and q, considering the limitations of the dynamodb to retrieve results, we need to run recursive query to get the required results.
normally we use the query with Q implementation something like this as
function getDBResults(){
var q = Q.defer();
var params = {
TableName: mytable,
IndexName: 'mytable-index',
KeyConditionExpression: 'id = :id',
FilterExpression: 'deliveryTime between :startTime and :endTime',
ExpressionAttributeValues: {
':startTime': {
N: startTime.toString()
},
":endTime": {
N: endTime.toString()
},
":id": {
S: id.toString()
}
},
Select: 'ALL_ATTRIBUTES',
ScanIndexForward: false,
};
dynamodb.query(params, function(err, data) {
if (err) {
console.log('Dynamo fail ' + err);
q.reject(err);
} else {
console.log('DATA'+ data);
var results = data.Items;
q.resolve(results);
}
});
return q.promise;
}
getDBResults.then(
function(data) {
// handle data
},
function(err) {
//handle error
}
);
Using recursive query I can get the results but I need those results to be used in another function, but because of nodejs async nature,the next function calls happens already before the recursive query function finishes its job, now I want that I get all the results from the recursive query function and then get as a promise to a new function and finally handle all the data.
recursive query for dynamodb looks like this.
function getDBResults(){
//var q = Q.defer();
params = {
TableName: mytable,
IndexName: 'mytable-index',
KeyConditionExpression: 'id = :id',
FilterExpression: 'deliveryTime between :startTime and :endTime',
ExpressionAttributeValues: {
':startTime': {
N: startTime.toString()
},
":endTime": {
N: endTime.toString()
},
":id": {
S: id.toString()
}
},
Select: 'ALL_ATTRIBUTES',
ScanIndexForward: false,
};
dynamodb.query(params, onQueryCallBack);
}
function onQueryCallBack(err, data) {
if (err) {
console.log('Dynamo fail ' + err);
console.error("Could not query db" + err);
} else {
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("query for more...");
params.ExclusiveStartKey = data.LastEvaluatedKey;
dynamodb.query(params, onQueryCallBack);
}
data.Items.forEach(function(item) {
allResults.push(item);
});
//console.log('NO:OF Results:' + allResults.length);
//q.resolve(tickets);
//});
}
Now I want that I can get the results as promise finally so I can handle them in the next function like this.
getDBResults.then(
function(data) {
// handle data
},
function(err) {
//handle error
}
);
Please help me on this, sorry if its a stupid question but recursive calls with promises have made a hurdle for me.
Thanks

First of all, keep the promisified function you already have. Use it as a building block for the recursive solution, instead of trying to alter it!
It might need two small adjustments though:
function getDBResults(startKey){
// ^^^^^^^^
var q = Q.defer();
var params = {
ExclusiveStartKey: startKey,
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
… // rest as before
};
dynamodb.query(params, function(err, data) {
if (err) {
q.reject(err);
} else {
q.resolve(data);
// ^^^^ Not `data.Items`
}
});
return q.promise;
}
Now we can use that to trivially implement the recursive solution:
function getRecursiveDBResults(key) {
return getDBResults(key).then(function(data) {
if (typeof data.LastEvaluatedKey != "undefined") {
return getRecursiveDBResults(data.LastEvaluatedKey).then(items) {
return data.Items.concat(items);
});
} else {
return data.Items
}
});
}

Here is how i solve the problem, Thanks Bergi for your solution as well
function getDBResults() {
var q = Q.defer();
var dynamodb = core.getDynamoDB();
params = {
TableName: mytable,
IndexName: 'mytable-index',
KeyConditionExpression: 'id = :id',
FilterExpression: 'deliveryTime between :startTime and :endTime',
ExpressionAttributeValues: {
':startTime': {
N: startTime.toString()
},
":endTime": {
N: endTime.toString()
},
":id": {
S: id.toString()
}
},
Select: 'ALL_ATTRIBUTES',
ScanIndexForward: false,
};
var results = [];
var callback = function(err, data) {
if (err) {
console.log('Dynamo fail ' + err);
q.reject(err);
} else if (data.LastEvaluatedKey) {
params.ExclusiveStartKey = data.LastEvaluatedKey;
dynamodb.query(params, callback);
} else {
q.resolve(results);
}
data.Items.forEach(function(item) {
results.push(item);
});
}
dynamodb.query(params, callback);
return q.promise;
}

Related

NodeJS - FOREACH function does not run all of elements and stop

I have stored 2775 urls in my mlab database and then I take each URL down to get more information. All of the URL I store in an Array then pass it into a function to process .However, The code only run up to about 1700 urls and process it and then stop. Here is my code (sorry about the code, this is my first time using stackoverflow :
Product.find({}, (err, foundProducts) => {
if (err) {
console.log("err " + err);
} else {
foundProducts.forEach(function(foundProduct) {
var updateProduct = service.updateTikiProduct(foundProduct.url);
});
}
});
updateTikiProduct: function(url) {
const options = {
url: url,
json: true
};
request(options,
function(err, res, body) {
// SOME code to crawl data
Product.findOneAndUpdate({
url: options.url
}, {
$set: {
name: name,
brand: brand,
store: store,
location: location,
base_category: categoryType,
top_description: topDescription,
feature_description: featureDescription
}
}, {
upsert: true,
new: true
}, (err, createdProduct) => {
if (err) {
reject(err);
} else {
var currentDate = new Date();
if (!createdProduct.hasOwnProperty("price")) {
createdProduct.price.push({
current: currentPrice,
origin: originPrice
});
createdProduct.save();
} else if (createdProduct.hasOwnProperty("price") &&
createdProduct.price[0].date.getDate() != currentDate.getDate()) {
createdProduct.price.push({
current: currentPrice,
origin: originPrice
});
createdProduct.save();
console.log("Update price");
}
counter++;
console.log("url : " + options.url);
console.log("Created product " + counter + " success!");
}
});
}
i guess mongo have limits to get items from db, you should try findAll or https://stackoverflow.com/a/3705615/4187058
I think your code is not processing all the elements is because you are processing all the elements in parallel, which will stop processing at one time when the memory will get full.
foundProducts.forEach(function(foundProduct) {
var updateProduct = service.updateTikiProduct(foundProduct.url);
});
what you should do is process them in series. you can use async await for that, do the following changes it will work :-
for(let foundProduct of foundProducts){
var updateProduct = await
service.updateTikiProduct(foundProduct.url);
};

Using async.js for deep populating sails.js

I have a big issue with my function in sails.js (v12). I'm trying to get all userDetail using async (v2.3) for deep populating my user info:
UserController.js:
userDetail: function (req, res) {
var currentUserID = authToken.getUserIDFromToken(req);
async.auto({
//Find the User
user: function (cb) {
User
.findOne({ id: req.params.id })
.populate('userFollowing')
.populate('userFollower')
.populate('trips', { sort: 'createdAt DESC' })
.exec(function (err, foundedUser) {
if (err) {
return res.negotiate(err);
}
if (!foundedUser) {
return res.badRequest();
}
// console.log('foundedUser :', foundedUser);
cb(null, foundedUser);
});
},
//Find me
me: function (cb) {
User
.findOne({ id: currentUserID })
.populate('myLikedTrips')
.populate('userFollowing')
.exec(function (err, user) {
var likedTripIDs = _.pluck(user.myLikedTrips, 'id');
var followingUserIDs = _.pluck(user.userFollowing, 'id');
cb(null, { likedTripIDs, followingUserIDs });
});
},
populatedTrip: ['user', function (results, cb) {
Trip.find({ id: _.pluck(results.user.trips, 'id') })
.populate('comments')
.populate('likes')
.exec(function (err, tripsResults) {
if (err) {
return res.negotiate(err);
}
if (!tripsResults) {
return res.badRequest();
}
cb(null, _.indexBy(tripsResults, 'id'));
});
}],
isLiked: ['populatedTrip', 'me', 'user', function (results, cb) {
var me = results.me;
async.map(results.user.trips, function (trip, callback) {
trip = results.populatedTrip[trip.id];
if (_.contains(me.likedTripIDs, trip.id)) {
trip.hasLiked = true;
} else {
trip.hasLiked = false;
}
callback(null, trip);
}, function (err, isLikedTrip) {
if (err) {
return res.negotiate(err);
}
cb(null, isLikedTrip);
});
}]
},
function finish(err, data) {
if (err) {
console.log('err = ', err);
return res.serverError(err);
}
var userFinal = data.user;
//userFinal.trips = data.isLiked;
userFinal.trips = "test";
return res.json(userFinal);
}
);
},
I tried almost everthing to get this fix but nothing is working...
I am able to get my array of trips(data.isLiked) but I couldn't get my userFInal trips.
I try to set string value on the userFinal.trips:
JSON response
{
"trips": [], // <-- my pb is here !!
"userFollower": [
{
"user": "5777fce1eeef472a1d69bafb",
"follower": "57e44a8997974abc646b29ca",
"id": "57efa5cf605b94666aca0f11"
}
],
"userFollowing": [
{
"user": "57e44a8997974abc646b29ca",
"follower": "5777fce1eeef472a1d69bafb",
"id": "5882099b9c0c9543706d74f6"
}
],
"email": "test2#test.com",
"userName": "dany",
"isPrivate": false,
"bio": "Hello",
"id": "5777fce1eeef472a1d69bafb"
}
Question
How should I do to get my array of trips (isLiked) paste to my user trips array?
Why my results is not what I'm expecting to have?
Thank you for your answers.
Use .toJSON() before overwriting any association in model.
Otherwise default toJSON implementation overrides any changes made to model associated data.
var userFinal = data.user.toJSON(); // Use of toJSON
userFinal.trips = data.isLiked;
return res.json(userFinal);
On another note, use JS .map or _.map in place of async.map as there is not asynchronous operation in inside function. Otherwise you may face RangeError: Maximum call stack size exceeded issue.
Also, it might be better to return any response from final callback only. (Remove res.negotiate, res.badRequest from async.auto's first argument). It allows to make response method terminal

Async double callback in NodeJS loop

I'm going to the next step of my webscraper today !
I'm already looping on an url array with async and I would loop again in this callback and wait for its exectution before restart.
I can not figure out how use two callback.
This is my code :
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blabla' + $(this).attr('href'));
})
// I WANT TO LOOP on the categoriesURL array HERE
var jsObject = { name : "", description : "", price: "", categorie: "", liter: "", kilo: "", pricePer: "", quantity: "", capacity: "", promotion: "", scrapingDate : "", url: "" };
data.push(jsObject);
})
}
cb();
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');
Does anyone know how can I do ?
Thanks
Made couple of changes in your code:
Used .mapSeries in place of .eachSeries. This way you can get data from iterator function in same order as the input array. Means you'll get [4,9] for input [2,3] to a square function, never [9,4]
Broke code into functions so that each function does one specific task
Moved categoriesURL processing out of loop 1
Returning early. It improves code readability. if (err) return callback(err);
function getWebData(url) {
// Using .mapSeries in place of .eachSeries as you seem to want to get data from iterator function
async.mapSeries(url, processUrl, function(err, results) {
// this will rum when loop is done
var json = JSON.stringify(results);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.error('Error', err);
console.log('File successfully written!');
});
});
}
function processUrl(url, callback) {
request(url, function(err, resp, body) {
if (err) // Return simple cases early; Improves code readability
return callback(err); // or return callback(); -- if you don't want to send error upwards
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li')
.each(function(i, element) { // loop 1
$(this)
.find('.nav_sous-menu_bloc li a')
.each(function(i, element) { // loop 2
categoriesURL.push('https://blablablac' + $(this)
.attr('href'));
}) // loop 2 end
}) // loop 1 end
// I WANT TO LOOP ON THE categoriesURL ARRAY HERE
// Using .mapSeries in place of .eachSeries for same above reason
async.mapSeries(categoriesURL, processCategoryUrl, function(err, results) {
if (err)
return callback(err);
// This function is called after process array categoriesURL
// Do what you want here then call callback provided to this method
return callback(null, results);
})
})
}
function processCategoryUrl(categoryUrl, callback) {
// Just process categoryUrl here and call callback with error or results
return callback();
}
getWebData(url);
app.listen('8080');
You can use nested eachSeries. Like this:
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blablablac' + $(this).attr('href'));
})
async.eachSeries(caturl, function(categoriesURL, cb2) {
//Do whatever you want to do here
cb2();
}, function() {
//You can apply if and else for err an according to that you can set your callback responce here
cb();
};
})
}
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');

sails.js find queries with async.js each, parallel calls - each returning early

sails v0.11.0 (http://sailsjs.org/)
I have tried unsuccessfully to use .exec callback, promises (http://sailsjs.org/documentation/reference/waterline-orm/queries) and now async.js (https://github.com/caolan/async) to control the asynchronous flow revolving around looping over a find query. The async.each log output does not have the parallel work in it (although parallel does populate).
So if your solution works using .exec callback, promises or async.js - I will gladly take it!
I found this link gave some helpful examples of async.js (http://www.sebastianseilund.com/nodejs-async-in-practice)
Thank you for your time and assistance.
Below is my code using async:
/**
* #module :: Service
* #type {{findProfile: Function}}
*/
require('async');
module.exports = {
getProfile: function (userId, callback) {
var json = {};
json.notFound = false;
json.locations = {};
json.sports = {};
User.findOne({id: userId}).exec(function (err, user) {
if (err) {
json.notFound = true;
json.err = err;
}
if (!err) {
json.user = user;
UserSport.find({user_id: user.id}).exec(function (err, userSports) {
if (err) {
sails.log.info("userSports error: " + userSports);
}
async.each(userSports, function (userSport, callback) {
LocationSport.findOne({id:userSport.locationsport_id}).exec(function (err, locationSport) {
if (locationSport instanceof Error) {
sails.log.info(locationSport);
}
async.parallel(
[
function (callback) {
Location.findOne({id:locationSport.location_id}).exec(function (err, location) {
if (location instanceof Error) {
sails.log.info(location);
}
callback(null, location);
});
},
function (callback) {
Sport.findOne({id:locationSport.sport_id}).exec(function (err, sport) {
if (sport instanceof Error) {
sails.log.info(sport);
}
callback(null, sport);
});
}
],
function (err, results) {
if (!(results[0].id in json.locations)) {
json.locations[results[0].id] = results[0];
}
if (!(results[1].id in json.sports)) {
json.sports[results[1].id] = results[1];
}
}
); // async.parallel
}); // locationSport
callback();
}, function (err) {
sails.log.info('each');
sails.log.info(json);
}); // async.each
}); // UserSport
}
}); // User
}
}
The structure of your code is the following :
async.each(userSports, function (userSport, callback) {
// Whatever happen here, it runs asyncly
callback();
}, function (err) {
sails.log.info('each');
sails.log.info(json);
}); // async.each
You are calling the callback method, but the processing on your data is not yet done (it is running async-ly). As a result, sails.log.info is called immediatly.
You should modify your code so the callback is called once the process is done. i.e in the result of your async.parallel :
async.each(userSports, function (userSport, outer_callback) {
LocationSport.findOne({id:userSport.locationsport_id}).exec(function (err, locationSport) {
//...
async.parallel(
[
function (callback) {
// ...
},
function (callback) {
// ...
}
],
function (err, results) {
// ...
outer_callback();
}
); // async.parallel
}); // locationSport
}, function (err) {
sails.log.info('each');
sails.log.info(json);
}); // async.each

How can convert this node.async code to using q? Do I need to return a promise?

In "view" method within my controller was previously using node-async but I wanted to try out using q.
I'm currently trying to convert this
exports.view = function (req, res) {
var category = req.params.category,
id = req.params.id,
ip = req.connection.remoteAddress,
slug = req.params.slug,
submission,
userId = typeof req.session.user !== 'undefined' && req.session.user.id ? req.session.user.id : null,
views;
var getSubmission = function (submissionId, callback) {
Submission.getSubmission({
id: submissionId
}, function (err, submission) {
if (err) {
callback(err);
} else if (submission) {
callback(null, submission);
} else {
callback(err);
}
});
};
async.waterfall([
function (callback) {
getSubmission(id, callback);
},
function (submission, callback) {
res.render('submission', {
title: submission.title + ' -',
submission: submission
});
}]);
To using q... I started doing something like:
var getSubmission = function(id) {
return Submission.getSubmission({
id : submissionId
}).then(function(submission) {
return submission;
});
};
q.fcall(getSubmission).then(function(submission) {
console.log(submission);
});
But it's not quite working as I intended. Am I doing something wrong here? How can I do this?
Is Submission.getSubmission a call to a database? Then you can't "chain" promises to that. You'll have to use the deferred method:
var getSubmission = function(id) {
var deferred = Q.defer();
Submission.getSubmission({
id: id
}, function(err, data){
if (err) {
deferred.reject(err);
} else {
deferred.resolve(data);
}
});
return deferred.promise;
}
getSubmission(some_id).then(successCallback, failureCallback);
You can also use Q#denodeify to convert a function using nodejs-style callbacks (function(err, data)) into a promise based function. Thus, the above can also be achieved by the following:
getSubmissionPromise = Q.denodeify(Submission.getSubmission);
getSubmissionPromise({id: some_id}).then(successCallback, failureCallback);

Categories

Resources