Async double callback in NodeJS loop - javascript

I'm going to the next step of my webscraper today !
I'm already looping on an url array with async and I would loop again in this callback and wait for its exectution before restart.
I can not figure out how use two callback.
This is my code :
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blabla' + $(this).attr('href'));
})
// I WANT TO LOOP on the categoriesURL array HERE
var jsObject = { name : "", description : "", price: "", categorie: "", liter: "", kilo: "", pricePer: "", quantity: "", capacity: "", promotion: "", scrapingDate : "", url: "" };
data.push(jsObject);
})
}
cb();
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');
Does anyone know how can I do ?
Thanks

Made couple of changes in your code:
Used .mapSeries in place of .eachSeries. This way you can get data from iterator function in same order as the input array. Means you'll get [4,9] for input [2,3] to a square function, never [9,4]
Broke code into functions so that each function does one specific task
Moved categoriesURL processing out of loop 1
Returning early. It improves code readability. if (err) return callback(err);
function getWebData(url) {
// Using .mapSeries in place of .eachSeries as you seem to want to get data from iterator function
async.mapSeries(url, processUrl, function(err, results) {
// this will rum when loop is done
var json = JSON.stringify(results);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.error('Error', err);
console.log('File successfully written!');
});
});
}
function processUrl(url, callback) {
request(url, function(err, resp, body) {
if (err) // Return simple cases early; Improves code readability
return callback(err); // or return callback(); -- if you don't want to send error upwards
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li')
.each(function(i, element) { // loop 1
$(this)
.find('.nav_sous-menu_bloc li a')
.each(function(i, element) { // loop 2
categoriesURL.push('https://blablablac' + $(this)
.attr('href'));
}) // loop 2 end
}) // loop 1 end
// I WANT TO LOOP ON THE categoriesURL ARRAY HERE
// Using .mapSeries in place of .eachSeries for same above reason
async.mapSeries(categoriesURL, processCategoryUrl, function(err, results) {
if (err)
return callback(err);
// This function is called after process array categoriesURL
// Do what you want here then call callback provided to this method
return callback(null, results);
})
})
}
function processCategoryUrl(categoryUrl, callback) {
// Just process categoryUrl here and call callback with error or results
return callback();
}
getWebData(url);
app.listen('8080');

You can use nested eachSeries. Like this:
var getWebData = function(url) {
var data = [];
async.eachSeries(url, function(urlSingle, cb) {
request(urlSingle, function(err, resp, body) {
if (!err) {
var $ = cheerio.load(body);
var categoriesURL = [];
$('.ombre_menu li').each(function(i, element) {
$(this).find('.nav_sous-menu_bloc li a').each(function(i, element) {
categoriesURL.push('https://blablablac' + $(this).attr('href'));
})
async.eachSeries(caturl, function(categoriesURL, cb2) {
//Do whatever you want to do here
cb2();
}, function() {
//You can apply if and else for err an according to that you can set your callback responce here
cb();
};
})
}
})
}, function() {
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
getWebData(url);
app.listen('8080');

Related

Getting records from DynamoDB recursively using Q.Promises

I am having trouble implementing Q promises with recursive dynamodb call, new to nodejs and q, considering the limitations of the dynamodb to retrieve results, we need to run recursive query to get the required results.
normally we use the query with Q implementation something like this as
function getDBResults(){
var q = Q.defer();
var params = {
TableName: mytable,
IndexName: 'mytable-index',
KeyConditionExpression: 'id = :id',
FilterExpression: 'deliveryTime between :startTime and :endTime',
ExpressionAttributeValues: {
':startTime': {
N: startTime.toString()
},
":endTime": {
N: endTime.toString()
},
":id": {
S: id.toString()
}
},
Select: 'ALL_ATTRIBUTES',
ScanIndexForward: false,
};
dynamodb.query(params, function(err, data) {
if (err) {
console.log('Dynamo fail ' + err);
q.reject(err);
} else {
console.log('DATA'+ data);
var results = data.Items;
q.resolve(results);
}
});
return q.promise;
}
getDBResults.then(
function(data) {
// handle data
},
function(err) {
//handle error
}
);
Using recursive query I can get the results but I need those results to be used in another function, but because of nodejs async nature,the next function calls happens already before the recursive query function finishes its job, now I want that I get all the results from the recursive query function and then get as a promise to a new function and finally handle all the data.
recursive query for dynamodb looks like this.
function getDBResults(){
//var q = Q.defer();
params = {
TableName: mytable,
IndexName: 'mytable-index',
KeyConditionExpression: 'id = :id',
FilterExpression: 'deliveryTime between :startTime and :endTime',
ExpressionAttributeValues: {
':startTime': {
N: startTime.toString()
},
":endTime": {
N: endTime.toString()
},
":id": {
S: id.toString()
}
},
Select: 'ALL_ATTRIBUTES',
ScanIndexForward: false,
};
dynamodb.query(params, onQueryCallBack);
}
function onQueryCallBack(err, data) {
if (err) {
console.log('Dynamo fail ' + err);
console.error("Could not query db" + err);
} else {
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("query for more...");
params.ExclusiveStartKey = data.LastEvaluatedKey;
dynamodb.query(params, onQueryCallBack);
}
data.Items.forEach(function(item) {
allResults.push(item);
});
//console.log('NO:OF Results:' + allResults.length);
//q.resolve(tickets);
//});
}
Now I want that I can get the results as promise finally so I can handle them in the next function like this.
getDBResults.then(
function(data) {
// handle data
},
function(err) {
//handle error
}
);
Please help me on this, sorry if its a stupid question but recursive calls with promises have made a hurdle for me.
Thanks
First of all, keep the promisified function you already have. Use it as a building block for the recursive solution, instead of trying to alter it!
It might need two small adjustments though:
function getDBResults(startKey){
// ^^^^^^^^
var q = Q.defer();
var params = {
ExclusiveStartKey: startKey,
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
… // rest as before
};
dynamodb.query(params, function(err, data) {
if (err) {
q.reject(err);
} else {
q.resolve(data);
// ^^^^ Not `data.Items`
}
});
return q.promise;
}
Now we can use that to trivially implement the recursive solution:
function getRecursiveDBResults(key) {
return getDBResults(key).then(function(data) {
if (typeof data.LastEvaluatedKey != "undefined") {
return getRecursiveDBResults(data.LastEvaluatedKey).then(items) {
return data.Items.concat(items);
});
} else {
return data.Items
}
});
}
Here is how i solve the problem, Thanks Bergi for your solution as well
function getDBResults() {
var q = Q.defer();
var dynamodb = core.getDynamoDB();
params = {
TableName: mytable,
IndexName: 'mytable-index',
KeyConditionExpression: 'id = :id',
FilterExpression: 'deliveryTime between :startTime and :endTime',
ExpressionAttributeValues: {
':startTime': {
N: startTime.toString()
},
":endTime": {
N: endTime.toString()
},
":id": {
S: id.toString()
}
},
Select: 'ALL_ATTRIBUTES',
ScanIndexForward: false,
};
var results = [];
var callback = function(err, data) {
if (err) {
console.log('Dynamo fail ' + err);
q.reject(err);
} else if (data.LastEvaluatedKey) {
params.ExclusiveStartKey = data.LastEvaluatedKey;
dynamodb.query(params, callback);
} else {
q.resolve(results);
}
data.Items.forEach(function(item) {
results.push(item);
});
}
dynamodb.query(params, callback);
return q.promise;
}

How to get response from callback every time api called?

I have search function that receive string from client, so once loop process is done sending matching results to client using callback. With below code two issues
1-first time when i have search string it sends the response results but when
2- second if i search different string it sends empty array of results
3- if i search first string again i get the response
Any idea what is implemented wrong in below code.
app.js
app.get('/serverSearch', function (req, res) {
var searchTxt = req.query.searchTxt;
dirDirectory.readDirectory(function(logFiles){
// res.json(logFiles);
if(logFiles){
searchFileService.readFile(searchTxt,logFiles,function(lines,err){
console.log('Logs',lines);
if (err)
return res.send();
res.json(lines);
})
}
});
console.log('Search text', searchTxt);
});
searchService.js
var results = [];
var searchStr;
function readFile(str, logFiles, callback) {
searchStr = str;
// loop through each file
async.eachSeries(logFiles, function (logfile, done) {
// read file
fs.readFile('logs/dit/' + logfile.filename, 'utf8', function (err, data) {
if (err) {
return done(err);
}
var lines = data.split('\n'); // get the lines
lines.forEach(function(line) { // for each line in lines
if (line.indexOf(searchStr) != -1) { // if the line contain the searchSt
results.push(line);
}
});
// when you are done reading the file
done();
});
// wrong: }), function (err) {
}, function (err) {
if (err) {
console.log('error', err);
}
console.log('all done: ', results);
// wrong: results.map(result, function (result){
results.map(function (result){
console.log(result);
});
// send back results
callback(results);
results = [];
});
}

Javascript/NodeJS callbacks function and loop

I've made a webscraper with cheerio and request and I'm trying now to implement a loop on an array of url.
Unfortunately I'm doing something wrong with my calls and callback but I can not figure out what.
This is my code :
var getWebData = function(url) {
var i = 1;
var data = [];
for (c = 0; c < url.length; c++) {
data[i] = request(url[c], function(err, resp, body) {
console.log('ok');
if (!err) {
console.log('there');
var $ = cheerio.load(body);
$('.text').each(function(i, element) {
var jsObject = { name : "", description : "", price: "", categorie: "", pricePerKg: "", capacity: "", weight: "", scrapingDate : "", url: ""};
var name = 'TESTOK';
jsObject.name = name;
data.push(jsObject);
})
return data;
}
console.log('but');
});
i++;
}
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
})
}
getWebData(url);
app.listen('8080');
Note than any of my debugs print are not printed.
Does anyone know what's wrong in my code and how can I do to make it work ?
request is Aysnc
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
})
This above code runs before the for loop completetes execution and populates data object.
Try executing this piece of code when loop complete execution.
run this command first npm install async --save
var async = require('async');
var getWebData = function(url){
var data = [];
async.eachSeries(url, function(urlSingle , cb){
request(urlSingle, function(err, resp, body) {
//write your logic here and push data in to data object
cb();
})
},function(){
// this will rum when loop is done
var json = JSON.stringify(data);
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
console.log('File successfully written!');
});
});
}
I have been reading Asif's answer and the comments. That implementation is correct but you dont have to increment the c variable, also, if you initiate c=0 before, all the requests will be to url[0].
note that async.eachSeries callbacks each element of the array url in "urlsingle" callback, so you should use
request(urlsingle, ...
or consider using async.eachOf which gives you the index of each element in the array.
check for async documentation for any doubts http://caolan.github.io/async/
for (c = 0; c < url.length; c++) {
……
}
you should change like this:
var async = require('asycn');
async.map(url,
function(item, callback) {
data[i] = request(url[c],
function(err, resp, body) {
console.log('ok');
if (!err) {
console.log('there');
var $ = cheerio.load(body);
$('.text').each(function(i, element) {
var jsObject = {
name: "",
description: "",
price: "",
categorie: "",
pricePerKg: "",
capacity: "",
weight: "",
scrapingDate: "",
url: ""
};
var name = 'TESTOK';
jsObject.name = name;
data.push(jsObject);
}) callback(err, data);
}
console.log('but');
});
i++;
},function(err, results) {
if(err){
console.log(err);
}
});
in the loop is a time consuming operation.you should use asynchronous operation.

About Nodejs global var

var articleFinish;
async.series({
one: function(callback) {
async.eachOf(set, function(tags, index, callback) {
Article.findById(articleId).exec(function(err, article) {
article.weibos[2].text = 'aa'; //success
article.weibos[2].tags = 'aaa'; //success
article.weibos[index].text = tags; //can't set value for next 'article'
article.weibos[index].tags = tags; //can't set value for next 'article'
articleFinish = article;
callback();
});
}, function(err) {
if (err) console.error(err.message);
callback(null, 3);
});
},
two: function(callback) {
Article.findById(articleId).exec(function(err, article) {
article = articleFinish;//can't get value when use the last two statement.
article.save(function(err) {});
});
}
}, function(err, results) {
res.send(results);
});
I want to set the last article's value by a global var.
What should I do?

synchronize and serialize function or tasks on node js

i am stacking on this problem since a week, it's a problem of synchronize on Node JS.
The process that I want to do is :
1- check the existence of table (collection). --> if not insertion of data
2- if the table was created, then i have to find all data on table and compare it with the data that i want to insert.
3- if the new data is already exist on the database (table) the program doesn't do any thing, if not the program inserts the new data to the the database (table).
So we have 3 functions should be scheduled.
function 1
var getCollection = function(collection, new_theme, nbr_indicateur,callback) {
dbObject.listCollections().toArray(function(err, collections){
if ( err ) throw err;
assert.equal(err, null);
collections.forEach(function(collect){
if(collect.name == collection)
{
callback(true);
}
else {
dbObject.collection(collection).insertOne( {
"name_theme" : new_theme,
"nbr_indicateur" : nbr_indicateur
}, function(err, result) {
assert.equal(err, null);
console.log("Inserted a document into the Table_Mapping_Theme collection.");
});
callback(false);
}
});
});
};
function 2 :
var getData = function(value, collection, theme, callback) {
var clb = true;
if(value)
{
dbObject.collection(collection).find({}).toArray(function(err, docs){
if ( err ) throw err;
assert.equal(err, null);
docs.forEach(function(doc){
if(doc.name_theme == theme)
{
console.log("ce theme existe déja");
clb = false;
}
});
});
}
callback(clb);
};
function 3 :
var insertData = function(value, collection, new_theme, nbr_indicateur, callback) {
if(value)
{
dbObject.collection(collection).insertOne( {
"name_theme" : new_theme,
"nbr_indicateur" : nbr_indicateur
}, function(err, result) {
assert.equal(err, null);
console.log("Inserted a document into the "+collection+" collection.");
});
}
callback("done");
};
calling those functions (app.post using express js)
here i tried with pyramid method but it doesn't work
app.post('/setting/add_theme', urlencodedParser, function(req, res) {
getCollection('Table_Theme', req.body.new_theme, req.body.nbr_indicateur, function(value0){ console.log("0"+value0);
getData(value0,'Table_Theme', req.body.new_theme, function(value1) { console.log("1"+value1);
insertData(value1, 'Table_Theme', req.body.new_theme, req.body.nbr_indicateur, function(value2){ console.log("2"+value2);
});
});
});
res.redirect('/setting');
});

Categories

Resources