I have updated the post with the actual code.
The problem is that the node app hangs and does not exit unless I comment out the query in addArticle. I am wonder what I'm doing wrong here (in regards to the hanging problem).
function addArticle(title, text, date, link) {
connection.query("SELECT * FROM articles WHERE link LIKE "+connection.escape(link), function(error, rows, fields) {
if(rows.length == 0) {
console.log("article not in database");
console.log(connection.escape(title));
var values = [connection.escape(title), connection.escape(text), date, connection.escape(link), '{}'];
connection.query("INSERT INTO articles (title, text, date, link, topics) VALUES ?", [[values]], function(err) {
if(err) throw err;
});
}
});
}
function scrapeReuters() {
var url = 'http://www.reuters.com/news/archive/technologyNews?date=10092013';
request(url, function(err, resp, body){
$ = cheerio.load(body);
links = $('a');
$(links).each(function(i, link){
var addr = $(link).attr('href');
if(addr != undefined && addr.indexOf('article') != -1 && addr.indexOf('http') == -1 ) {
var full_link = "http://www.reuters.com"+addr;
var title = $(link).text();
request(full_link, function(err, resp, body){
$ = cheerio.load(body);
para = $('p').text();
addArticle(title, para,new Date().getTime(), full_link);
});
}
});
});
}
You probably need to close the connection after all the queries have finished. You can try using the https://github.com/caolan/async library to run the queries in sequence and then in a master callback, close the connection.
Its a little tricky, but first you need to define an array of functions to execute. Then you run async.sequence(arrayOfFns,masterCallback). The master callback gets errs and results (notice plural, its from all the functions). In that master callback, terminate the mysql connection/and or end the process.
To do this, I would rewrite the addArticle query to just return the query string. Then before your $(links).each loop, I would make an array called toInsert
In each loop I would say
toInsert.push(function(callback) {
connection.query(addArticle(...),function(err) {
if(err) callback(err);
else callback(null,true);
});
});
Then after the loop run
async.sequence(toInsert,function(errs,results) {
connection.close() //not sure if correct
process.exit(); //maybe, if needed?
});
Related
I'm writing an application that lets you add visits (visit date, visit type, notes) to a case, via an Ajax POST call from a form. The visit creation functionality lets you add the same visit type and notes on several dates. So I end up with a visit object that has an array of dates in it, but the same notes and visit type. Because SQL isn't where I should be doing any looping, I want to do it in Node as I'll be able to handle any failures in the array or results returned from the individual SQL calls.
I tried setting up the procedure call so that it took an array of arrays in an array as a parameter as per here, but I couldn't get it to work, so am falling back to looping through.
The issue I'm having is with callbacks completing before I've got any results. Obviously it's because I don't understand callbacks enough and no amount of reading is making it any clearer, so I've ended up here to ask for help.
Below is the code that is executed. The visit object that is the parameter of the insertVisit function is the class as mentioned above with the array of dates.
this.insertVisit = function (req, res, visit)
{
var insertVisit = new Visit();
insertVisit = visit;
var success = 0;
var visitId = 0;
//Split the visits into an array of individual dates
var allVisits = insertVisit.visitDates.split(',');
//Attemp to call insertVisits using a callback
insertVisits(0, function(err){
if( err ) {
console.log('yeah, that insert didnt work: '+ err)
}
});
console.log('finished');
function insertVisits(v)
{
//Loop through all of the visits
if (v < allVisits.length )
{
//Attempt to call the next function
singleDate(allVisits[v], function(err)
{
if(err)
{
console.log(err);
}
else
{
//if everything is successful, insert the next individual date
allVisits[v + 1];
}
})
}
}
function singleDate(singleVisitDate)
{
var query = 'CALL aau.sp_InsertVisit (?,?,?,?,?,?,?,#visitId,#success); SELECT #visitId, #success;';
var parts = singleVisitDate.split('-');
var formattedDate = new Date(parts[2], parts[1] - 1, parts[0]);
connection.init();
//Everything runs fine up to here, but as soon as we go to the next line, the program
//continues back at the end of the loop in the insertVisits function an exits the function.
//At this point the below code executes asynchronously and inserts one of the dates before returning
//and doesn't call any further dates.
connection.acquire(function (err, con)
{
con.query(query,
[
insertVisit.caseId,
formattedDate,
parseInt(insertVisit.visitTypeId),
parseInt(insertVisit.visitStatusId),
insertVisit.adminNotes,
insertVisit.operatorNotes,
insertVisit.isDeleted,
visitId,
success
]
, function (err, result)
{
if(err)
{
console.log(err);
}
else
{
con.release();
res.write(JSON.stringify(result));
}
})
})
}
So I'm trying to loop through each of the dates and call the stored procedure for each date and add the results to response using res.write.
This is a brand new project, so happy to rewrite it with promises or asynch/await. But any examples would be greatly appreciated of looping through multiple procedure calls
Ok,
So I looked at using async.eachSeries and managed to get it to work when I put the callback at the bottom of the 'tree'.
Hopefully this can be helpful to anyone else trying to run the same proc multiple times.
this.insertVisit = function (req, res, visit)
{
var insertVisit = new Visit();
insertVisit = visit;
var success = 0;
var visitId = 0;
var allVisits = insertVisit.visitDates.split(',');
async.eachSeries(allVisits, function(singleVisitDate, callback)
{
var query = 'CALL aau.sp_InsertVisit (?,?,?,?,?,?,?,#visitId,#success); SELECT #visitId, #success;';
var parts = singleVisitDate.split('-');
var formattedDate = new Date(parts[2], parts[1] - 1, parts[0]);
connection.init();
connection.acquire(function (err, con)
{
con.query(query,
[
insertVisit.caseId,
formattedDate,
parseInt(insertVisit.visitTypeId),
parseInt(insertVisit.visitStatusId),
insertVisit.adminNotes,
insertVisit.operatorNotes,
insertVisit.isDeleted,
visitId,
success
]
, function (err, result)
{
if(err)
{
console.log(err);
}
else
{
con.release();
res.write(JSON.stringify(result));
callback();
}
})
})
},
function(err)
{
if(err)
{
console.log(err);
}
else
{
res.end();
}
});
i'm iterating trought a mysql database with 20000 rows , the problem is that in each roww i need to call a url and get its content and update the database, the problem is... how do i wait the whole process to continue the iteration?, and how can i make it faster like , do two at time , do tree at time?. thanks
var query = connection.query('SELECT * from product where product.product_description = "0" ', function(err, rows, fields) {
kontador =1;
if (!err)
{
var url = rows[0].url;
url = url.replace('../..','');
//console.log(url);
id = rows[0].id;
url = 'http://example.com'+url;
doCall(url,id,kontador,function(response){
console.log(response,kontador);
if(response && kontador <= rows.length){
var url = rows[kontador].url;
url = url.replace('../..','');
id = rows[kontador].id;
url = 'http://www.example2.com'+url;
//console.log(id);
doCall(url,id, kontador, doCall);
kontador +=1;
}
});
}
else
console.log('Error while performing Query.');
});
function doCall(urlToCall,id,kontador, callback)
{
request({'url':urlToCall}, function(error, response, html){
//console.log('inside');
//console.log(error);
if(!error){
var $ = cheerio.load(html);
$('#content').filter(function(){
var data = $(this);
data = data.find('p');
// console.log('-');
// console.log(data.html());
var queryy = connection.query(' UPDATE product SET product_description = "'+data.html()+'" WHERE id = '+id, function(err, rows, fields) {
if (!err)
{
console.log('updated! ');
return callback(true);
}else{
console.log('error sql!');
}
});
//process.exit();
});
}
});
}
In order to orchestrate the async behavior of your application (what can be done in parallel, should there be throttling, ..) you should use an existing library like :
async - https://www.npmjs.com/package/async - if you prefer node.js callback style
bluebird - http://bluebirdjs.com/docs/getting-started.html - if you prefer promises
highland - https://www.npmjs.com/package/highland - somewhat hybrid + stream like
There are many other libraries that can help you build complex async call graphs.
I got a file newuser.js (node.js environment featuring a mongodb database managed via mongoose) containing the following code:
//newuser.js
//basically creates new user documents in the database and takes a GET parameter and an externally generated random code (see randomcode.js)
[...]
var randomCode = require ('randomcode');
var newTempUser = new tempUser({name: req.body.name, vericode: randomCode.randomveriCode(parameter)
});
newTempUser.save(function (err){
//some output
});
//randomcode.js
//creates a random sequence of characters (=vericode), checks if code already exists in DB and restarts function if so or returns generated code
exports.randomveriCode = function randomveriCode(parameter){
[...]
var TempUser = conn.model('TempUser', TempUserSchema);
TempUser.count({vericode: generatedcode}, function(err, counter){
if (counter=='0'){
return generatedcode;
}else{
randomveriCode(parameter);
}
});
};
Problem is, that newuser.js throws an error as variable vericode is 'undefined' (thus mongoose model validations fails). The error does not occur if I skip the database query and instantly return the generated code (which in fact has got a value as verified by several console.log instructions). It occurs to me that the db query takes to long and empty or null value returned before query is complete? I thought about introducing promises unless you got any other suggestions or hints what may cause this behaviour?
Kind regards
Igor
Since querying the database is a non-blocking operation, you cannot expect the function call to return the value from the database immediately. Try passing in a callback instead:
// newuser.js
var randomCode = require('randomcode');
randomCode.randomveriCode(parameter, function(err, code) {
if (err) throw err; // TODO: handle better
var newTempUser = new tempUser({name: req.body.name, vericode: code});
newTempUser.save(function (err){
//some output
});
});
// randomcode.js
exports.randomveriCode = function randomveriCode(parameter, cb) {
var TempUser = conn.model('TempUser', TempUserSchema);
TempUser.count({vericode: generatedcode}, function(err, counter) {
if (err) return cb(err);
if (counter == '0') {
cb(null, generatedcode);
} else {
randomveriCode(parameter, cb);
}
});
};
your randomveriCode function contains calls to an asynchronous function and therefore, your function really needs to provide a callback argument like this:
exports.randomveriCode = function randomveriCode(parameter, callback){
[...]
var TempUser = conn.model('TempUser', TempUserSchema);
TempUser.count({vericode: generatedcode}, function(err, counter){
if(err) return callback(err);
if (counter=='0'){
return callback(null, generatedcode);
}else{
randomveriCode(parameter, callback);
}
});
};
You'd then call it like so:
var randomCode = require ('randomcode');
randomCode(function(err, vericode){
if(err) throw err;
var newTempUser = new tempUser({name: req.body.name, vericode: vericode});
newTempUser.save(function(err,newUser){
//do something here
});
});
Btw - you could also use a synchronous function to create a GUID. See https://www.npmjs.org/package/node-uuid.
So i have a csv file containing my information, i need to do a mass add/update
exports.add_questions_from_file = function (file_path, surveyid, callback)
{
var U = [{}];
fs.readFile(file_path, 'utf8', function(err, data){
if (err){
console.log(err);
callback(err,null);
}else{
console.log(data);
d = data.split(/\r\n|\n/);
for (x=0;x <d.length;x++)
{
line = d[x].split(',');
if (line[0] == "") {return};
RQuestion.add_by_line (line,function (err, question)
{
U.push({id:question.id});
console.log(U);
});
}
}
});
Survey.update({_id:surveyid},{$push:{"SurveyQuestions":U}},function (err,numAffected, rawResponse) {
console.log(rawResponse);
RET = {"module":"survey","operation": "add", "status":"OK"};
callback(RET);
});
};
But even though im using callback functions the update seems to happen with the same object always, even the console.log here
U.push({id:question.id});
console.log(U);
returns the same object (even that all the other were created)
Im doing something wrong?
I see a few issues.
First for:
if (line[0] == "") {return};
Don't you mean to use a break or continue instead? Otherwise the entire function will quit if there is a blank line anywhere in the file. This is very important because Survey.update won't get called either.
Second: I assumed that RQuestion.add_by_line and Survey.update are doing something async like updating a database. Your code needs to be restructured to wait for those async items to complete before moving on to the next step. I'd recommend an npm package named async for that.
fs.readFile(file_path, 'utf8', function(err, data){
if (err){
console.log(err);
callback(err,null);
}else{
d = data.split(/\r\n|\n/);
async.map(d, function(line, callback) {
//this function is called for each line
add_by_line (line,function (err, question)
{
callback(err,{id:question.id});
});
}, function(err, results) {
//this function is called when all of the items are done
console.log("done with async");
console.dir(results);
Survey.update({_id:surveyid},{$push:{"SurveyQuestions":results},function (err,numAffected, rawResponse) {
console.log(rawResponse);
RET = {"module":"survey","operation": "add", "status":"OK"};
callback(RET);
});
});
}
});
I am just starting out with mongodb, but I am running into a problem when trying to use .find() on a collection.
I've created a DataAccessObject which opens a specific databate and then lets your perform operations on it. Here is the code:
The constructor:
var DataAccessObject = function(db_name, host, port){
this.db = new Db(db_name, new Server(host, port, {auto_reconnect: true}, {}));
this.db.open(function(){});
}
A getCollection function:
DataAccessObject.prototype.getCollection = function(collection_name, callback) {
this.db.collection(collection_name, function(error, collection) {
if(error) callback(error);
else callback(null, collection);
});
};
A save function:
DataAccessObject.prototype.save = function(collection_name, data, callback){
this.getCollection(collection_name, function(error, collection){
if(error) callback(error);
else{
//in case it's just one article and not an array of articles
if(typeof (data.length) === 'undefined'){
data = [data];
}
//insert to collection
collection.insert(data, function(){
callback(null, data);
});
}
});
}
And what seems to be the problematic one - a findAll function:
DataAccessObject.prototype.findAll = function(collection_name, callback) {
this.getCollection(collection_name, function(error, collection) {
if(error) callback(error)
else {
collection.find().toArray(function(error, results){
if(error) callback(error);
else callback(null, results);
});
}
});
};
Whenever I try to dao.findAll(error, callback), the callback never gets called.
I've narrowed the problem down to the following part of the code:
collection.find().toArray(function(error, result){
//... whatever is in here never gets executed
});
I've looked at how other people do it. In fact, I'm following this tutorial very closely. No one else seems to have this problem with colelction.find().toArray(), and it doesn't come up in my searches.
Thanks,
Xaan.
You are not using the open callback so if you are trying to make the findall request right after creating the dao then it won't be ready.
If your code is like this, it will not work.
var dao = new DataAccessObject("my_dbase", "localhost", 27017);
dao.findAll("my_collection",function() {console.log(arguments);});
I tested it and it doesn't find records, and it also gives no error. I think it should give an error.
But if you change it so that you give a callback to the constructor, then it should work.
var DataAccessObject = function(db_name, host, port, callback){
this.db = new Db(db_name, new Server(host, port, {auto_reconnect: true}, {}));
this.db.open(callback);
}
And make your code like this.
var dao = new DataAccessObject("my_dbase", "localhost", 27017, function() {
dao.findAll("my_collection",function() {console.log(arguments);});
});