Recursive Fetch All Items In DynamoDB Query using Node JS - javascript

This is probably more of an JS/Async question than a DynamoDB specific question -
I want to fetch all the items in a table with a hash key in Amazon's DynamoDB. The table also has Range key in it.
I am using a NodeJS library which is a wrapper around AWS DynamoDB REST API. -
Node-DynamoDB
DynamoDB only returns 1 MB worth of results with each query. To fetch reminder of results, it includes lastEvaluatedKey . We can include this in another query to fetch another 1 MB worth of results and so on...
I am facing difficulty in writing a recursive async function which should hit the service sequentially till i can get all the results back. (table will never have more than 10 MB for my use case, no chance of a runaway query)
Some pseudo code for illustration:
ddb.query('products', primarykey, {}, function(err,result){
//check err
if(result && result.lastEvaluatedKey){
//run the query again
var tempSet = result.items;
//temporarily store result.items so we can continue and fetch remaining items.
}
else{
var finalSet = result.items;
//figure out how to merge with items that were fetched before.
}
});

var getAll = function(primarykey, cb) {
var finalSet = [],
nextBatch = function(lek) {
ddb.query('products', primarykey, {
exclusiveStartKey: lek
}, function(err, result) {
if (err) return cb(err);
if (result.items.length)
finalSet.push.apply(finalSet, result.items);
if (result.lastEvaluatedKey)
nextBatch(result.lastEvaluatedKey);
else
cb(null, finalSet);
});
};
nextBatch();
};
getAll(primarykey, function(err, all) {
console.log(err, all);
});

After few cups of coffee, i wrote this recursive function..
Hope this helps others, If you see a bug , please edit it or leave a comment
var DynamoDbItemFetcher = function(table,hash,maxItems,callback){
var self = this;
self.table = table;
self.startKey = null;
self.hash = hash;
self.maxItems = maxItems;
self.items = [];
self.callback = callback;
self.getItems = function(){
var params = {};
if(self.startKey){
params.exclusiveStartKey = self.startKey;
}
ddb.query(self.table,self.hash,params,function(err1,result){
if(err1)
return self.callback(err1, null);
if(result){
self.items = self.items.concat(result.items);
if(result.lastEvaluatedKey && result.lastEvaluatedKey.hash){
if(self.maxItems && self.items.length > self.maxItems){
self.callback(null,self.items);
}else {
self.startKey = result.lastEvaluatedKey;//reset start key
self.getItems(callback);//recursive call...
}
}else{
//no more items..return whatever is in store.
self.callback(null,self.items);
}
}
else{
self.callback(null, null);
}
});
};
};

Here's a variation using promises. I needed to get a list of table names, not scan items from a table, but similar concepts apply.
function getTableNames(key, prevTableNames) {
return new Promise(function(resolve, reject) {
let request = dynamodb.listTables({
ExclusiveStartTableName: key
}, function(err, response) {
if (err) {
reject(err);
} else {
let tableNames = (prevTableNames || []).concat(response.TableNames);
if (response.LastEvaluatedTableName) {
getTableNames(response.LastEvaluatedTableName, tableNames)
.then(resolve)
.catch(reject);
} else {
resolve(tableNames)
}
}
});
});
}

Related

Node.js and SQL: How can I get the value outside of the for loop?

Using Node.js and a MySQL database I'm working on a small project.
I'm trying to loop through an array to get some values out of a MySQL database. I'm searching for the corresponding "medicine ID" to an "medicine name" that the user entered. My Code is working correctly and this is how it looks.
var medizinArray = [];
function appendArray(input) {
medizinArray.push(input);
}
var sqlMedNameToId = "SELECT MedikamentId FROM Medikament WHERE Bezeichnung = ?"
for (var i=0;i<medicineMontag.length;i++){
var montagsMedizin = medicineMontag[i];
mySqlConnection.query(sqlMedNameToId, montagsMedizin, function(err, rows, fields){
if(!err) {
result = rows[0].MedikamentId;
appendArray(result);
} else {
console.log(err);
}
})
}
console.log(medizinArray);
The code is working but I can't get the medizinArray out of the for loop. In my console I get an empty array. When I put the console.log(medizinArray) inside the for loop I get the array that I want.
I'm currently not familiar with Promises. I read about it and saw some other questions but I can't figure out how to implement Promises in my code.
SQL operations are asynchronous, so to obtain the result outside of the callback you need to wrap them in a Promise and call the resolve() function when the operation is successful. Use any of the techniques below:
Async/await technique:
(async function(){
let medizinArray = [];
function appendArray(input) {
medizinArray.push(input);
}
let sqlMedNameToId = "SELECT MedikamentId FROM Medikament WHERE Bezeichnung = ?"
await new Promise(function(resolve, reject){
let e;
for (let i=0;i<medicineMontag.length;i++){
let montagsMedizin = medicineMontag[i];
mySqlConnection.query(sqlMedNameToId, montagsMedizin, function(err, rows, fields){
if(e) return;
if(!err) {
result = rows[0].MedikamentId;
appendArray(result);
} else {
//console.log(err);
e = true;
return reject(err);
}
if(i == medicineMontag.length-1) resolve(result);
})
}
}
);
console.log(medizinArray);//now medizinArray shows up here
})().catch(function(err){console.log(err)});
Promise/then technique:
let medizinArray = [];
function appendArray(input) {
medizinArray.push(input);
}
let sqlMedNameToId = "SELECT MedikamentId FROM Medikament WHERE Bezeichnung = ?"
new Promise(function(resolve, reject){
let e;
for (let i=0;i<medicineMontag.length;i++){
let montagsMedizin = medicineMontag[i];
mySqlConnection.query(sqlMedNameToId, montagsMedizin, function(err, rows, fields){
if(e) return;
if(!err) {
result = rows[0].MedikamentId;
appendArray(result);
} else {
console.log(err);
e = true;
return reject(err);
}
if(i == medicineMontag.length-1) resolve(result);
})
}
}
).then(function(result){
console.log(medizinArray);//now medizinArray shows up here
}).catch(function(err){
console.log(err);
});

Terminate mysql connection after multiple queries have executed

I have some node.js code which fetches data from an API in a loop and runs mutliple mysql queries to update some rows.
The issue I have is that the script keeps running until I terminate the mysql connection with connection.end(). I am a newbie in asynchronous code. Where do I call the termination function so that it executes when all the queries have finished executing? What's the right design pattern for this? Would waterfall be any good?
This is a snippet from the code I have at the moment (error handling removed for simplicity):
var connection = mysql.createConnection({ host, user, etc... });
for (var i = 0; i < 10; i++) {
var url = "http://api.com?i="+i;
request(url, function(error, response, body) {
var data = JSON.parse(body);
for (el in data) {
connection.query(
"UPDATE table SET col = ? WHERE symbol = ?",
[
data[el].col,
el
]
);
}
});
}
// this will run before all queries have executed
// resulting in an error
connection.end();
So, the problem here is that you are cycling in a synchronized way through the data here:
var data = JSON.parse(body);
for (el in data) {
connection.query(
"UPDATE table SET col = ? WHERE symbol = ?",
[
data[el].col,
el
]
);
}
while the mysql module handles the query in a callback style:
connection.query(query, function(error, rows, fields) {
if (error) {
return callback(error);
} else {
return callback(null,rows);
}
});
where callback has the signature callback(error,rows), so that you can handle the results in this way supposed to have a reusable function:
var executeQuery = function(query,callback) {
var self=this;
this.connection.query(query, function(error, rows, fields) {
if (error) {
return callback(error);
} else {
return callback(null,rows);
}
});
}
and you can call in your code like
executeQuery(statement, function(error,rows) {
//...
})
That said, you must consider that you are doing multiple queries to your database and it is not recommended to do this in for loop cycle. You should consider to use a better solution that could be a waterfall as you say or a promise all using the Promise paradigma.
Suppose that to have this nice function:
var promiseAllP = function(items, block) {
var promises = [];
items.forEach(function(item,index) {
promises.push( function(item,i) {
return new Promise(function(resolve, reject) {
return block.apply(this,[item,index,resolve,reject]);
});
}(item,index))
});
return Promise.all(promises);
}
that takes as input an array of items and a execution function that is function(item,index,resolve,reject) that has a resolve and reject functions of a Promise, so let's turn your executeQuery function in a Promise as well:
var executeQueryP = function(query) {
var self=this;
return new Promise(function(resolve, reject) {
self.connection.query(query, function(error, rows, fields) {
if (error) {
return reject(error);
} else {
return resolve(null,rows);
}
});
}
Now you can process your data in a totally async way promisyfied:
promiseAllP(data,(item,index,resolve,reject) => {
var query= "UPDATE table SET col = %s WHERE symbol = %s";
// example: prepare the query from item in the data
query = replaceInString(query,item.col,item);
executeQueryP(query)
.then(result => resolve(result))
.catch(error => reject(error))
})
.then(results => { // all execution completed
console.log(results)
})
.catch(error => { // some error occurred while executing
console.error(error)
})
where the replaceInString will help you to prepare the statement
var replaceInString = function() {
var args = Array.prototype.slice.call(arguments);
var rep= args.slice(1, args.length);
var i=0;
var output = args[0].replace(/%s|%d|%f|%#/g, function(match,idx) {
var subst=rep.slice(i, ++i);
return( subst );
});
return(output);
},//replace,
This is what we have done here:
Used native Promise only
Turned your mysql query in a promise
Called the statements against your data in a completely asynchronous way
Used a Promise and Promise all paradigma, that let you collect the results of the Promise and return to the caller when all the functions are completed.
Catched errors in all the statements execution
Added a simply way to fulfill statements with parameters
Also notice the arrow function syntax (param1, param2) => that simplify the way to write a function, that can help a lot with the Promise paradigma.
For anyone interested, I ended up solving it by a mixture of promises and counting the queries, something along the lines of this (not sure if this code actually works but the idea is there):
function fetchFromAPI() {
return new Promise((resolve, reject)=>{
var urls = [];
for (var i = 0; i < 10; i++) {
urls.push("http://api.com?i="+i);
}
var data = [];
var requestedUrls=0;
urls.forEach(url=>{
request(url, (err, response, body) {
if(err) reject(err);
data.push(JSON.parse(body));
requestedUrls++;
if(requestedUrls==urls.length) resolve(data);
};
});
}
}
fetchFromAPI().then(data=>{
mysql.createConnection({ user, hostname, etc... });
var processedKeys=0;
data.forEach(el=> {
mysql.query("UPDATE table SET name = ? WHERE id = ?", [el.name, el.id], (err, rows, fields) => {
processedKeys++;
if(processedKeys==data.length) {
connection.end();
}
});
}
}).catch(err=>{
console.error(err);
});

Iterate save in Node.JS across an array

I couldn't use a simple for loop because request.save is a function. So I tried forEach. It works perfectly! Until I add in the request.save part and I get the following error message that breaks my app.
Error: Can't set headers after they are sent.
exports.submit = function (req, res) {
Person.find({
cellPhone: req.body.phone
}).exec(function (err, people) {
people.forEach(saveRequest);
}
function saveRequest(item, index) {
var request = new Requests();
request.start = req.body.start.value;
request.finish = req.body.finish.value;
request.phone = req.body.phone;
request.offDay = req.body.date;
request.user = people[index]._id;
request.name = people[index].name;
request.group = people[index].group;
request.save(function (err) {
if (err) {
console.log('request.save');
return res.status(400);
} else {
// Remove sensitive data before login
//user.password = undefined;
//user.salt = undefined;
console.log(request);
res.json(request);
}
});
}
});
The problem is when you perform the .save() you pass an anonymous function that complete the response in case of error.
So you finish on the first save event error.
You should complete the response outside the save callback.
Maybe use events to sync your code, or better the generators.
Before your forEach loop:
let savedResponses = [];
let savedErrors = [];
...
Then your savedRequest:
function saveRequest(item, index) {
var request = new Requests();
request.start = req.body.start.value;
request.finish = req.body.finish.value;
request.phone = req.body.phone;
request.offDay = req.body.date;
request.user = people[index]._id;
request.name = people[index].name;
request.group = people[index].group;
request.save(function (err) {
if (err) {
console.log('request.save error');
savedErrors.push(err);
// return res.status(400);
} else {
// Remove sensitive data before login
//user.password = undefined;
//user.salt = undefined;
console.log(request);
savedResponses.push(request);
}
});
}
Then after the forEach loop, you should wait the end of the asynchronous staff in the .save() callbacks.
You could use the event package or the generators or the promise pattern.
It depend on the version of your node.
When you have the code synched you could just complete your response checking for errors first:
if (savedErrors.length > 0) {
res.status = 400;
// ... report errors
}
Or just complete the response with the savedResponses.

Waiting for data from async nested Functions within JQuery $.each in Javascript

this is a follow up question to Asynchron Errorhandling inside $.each. As mentioned in the comments there, i want to handle data after the last async job from a $.each loop.
So for instance:
var errors = 0;
var started = 0;
var successful = 0;
$.each(..., function(){
started++;
connection.query('INSERT INTO tableName SET ?', post, function(err, result)
{
if (err) {
if (err.code === 'ER_DUP_ENTRY')
{ errors++; }
else
{ throw err; }
} else { successful++;}
if (started == successful + errors) {
// all done
console.log(errors + " errors occurred");
}
});
});
In this case everything logs out properly when the // all done comment is reached. But what if i want to use this data later on instead of just logging it out.
Is there a way to wait for this data outside of the $.each scope? Or do i always have to handle everything in the nested function?
You can use promises instead
var promises = [];
$.each(..., function() {
var promise = new Promise(function(resolve, reject) {;
connection.query('INSERT INTO tableName SET ?', post, function(err, result) {
if (err) {
resolve(err.code);
} else {
resolve(result);
}
});
});
promises.push(promise);
});
var result = Promise.all(promises);
And then when you want to use the data, you do
result.then(function(data) {
// use data array looking like ["result data", "result data", "ER_DUP_ENTRY" .. etc]
})

Async for Array problems

I've been pointed towards using the async module, but I'm not quite sure how to use waterfall to solve my problem.
My original code had problems with asynchronicity.
var Image = require('./models/image');
var User = require('./models/user');
var query = Image.find({});
query.limit(10);
query.sort('-date')
query.exec(function (err, collected) {
if (err) return console.error(err);
var i = 0;
var authors = [];
while (i < 8) {
var search = User.find({'twitter.id' : collected[i].author});
search.exec(function (err, user){
if (err) return console.error(err);
var result = (user[0].twitter.username);
authors.push(result);
});
i = i + 1;
}
}
console.log(authors);
I want the authors array to hold all the found usernames. However when that last console.log() call returns '[]'
So, you want to wait for all of the searches to complete first. You should put all your async calls into an array, and then use an async library to chain them together (waterfall) or execute simultaneously (parallel). Parallel tends to execute "faster":
var searches = [];
while (i < 8) {
var search = User.find({'twitter.id' : collected[i].author});
searches.push(function(cb) {
search.exec(function (err, user){
if (err) cb(err, null);
else cb(null, user[0].twitter.username);
});
});
i++;
}
async.parallel(searches, function( err, authors ) {
if ( err ) return console.error( err );
console.log(authors);
// Authors only defined here.
// TODO: More code
});
// Authors not defined here.

Categories

Resources