Can I make node.js FTP synchronous? - javascript

I have a little FTP script which basically transfer an entire directory tree (by walking it with fs.readdir) to an FTP server one file at a time (I have to do some analysis on each file as it's uploaded hence the one-at-a-time behaviour).
However, the bit that does a single file (there's another bit for directories which uses c.mkdir rather than c.put) looks like this:
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
console.log(err);
});
c.end();
});
As you can see, it's using a very simple method of logging in that failures simply get sent to the console.
Unfortunately, since the FTPs are done asynchronously, errors are being delivered to the console in a sequence totally unrelated to the file name output.
Is there a way to force the FTP to be done synchronously so that errors would immediately follow the file name? Basically, I want the entire sequence from the initial console.log to the final }); to be done before moving on to the next file.

Even if there is, it's not recommended. You generally don't want to block the event loop with such a long synchronous operation.
What would probably be more useful is using recursion or Promises to ensure that things happen in a sequence.
Example:
let ivals = [/* lots of ivals here */];
function putItems(ivals) {
let ival = ivals[0];
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
console.log(err);
c.end();
// Don't continue if we're out of items.
if (ivals.length === 1) { return; }
putItems(ivals.slice(1)); // Call again with the rest of the items.
});
});
}
putItems(ivals);
It can probably be done more intelligently by using a nested function and a single FTP context. But you get the point.

Without making things synchronous, you can solve your error logging problem by just logging the name with the error. You can just wrap this in a closure so you can keep track of ival that goes with a particular error:
(function(ival) {
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
console.log('[' + ival + ']', err);
});
c.end();
});
})(ival);

Why dont you just push the errors to an array, and when all uploads are done, you will have that array
with all those errors in order ?
I will do something like this:
var errArray = [];
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
errArray.push( err );
});
c.end();
});
c.on('end', function() {
errArray.forEach( function( err ){
console.log( err );
})
});

Related

write() does not write sequentialy?

I have found that write() method of stream.Writable class does not write data sequentially. When I an sending am attachment to the server in chunks, this code assembles data chunks in wrong order if no delay occurs. If I put a debug message like console.log() in the middle of the loop (like to dump the data to watch what is being written, actually), this bug disappears. So, what is the race condition in this code ? Looks like I am enforcing a sequential assembling of the file, so I do not understand what is wrong.
My code:
function join_chunks(company_id,attachment_id,num_chunks) {
var stream;
var file;
var output_filename=ATTACHMENTS_PATH + '/comp' + company_id + '/' + attachment_id + '.data';
var input_filename;
var chunk_data;
var chunk_count=0;
stream=fs.createWriteStream(output_filename,{flags:'w+',mode: 0666});
console.log('joining files:');
for(var i=0;i<num_chunks;i++) {
input_filename=ATTACHMENTS_PATH + '/comp' + company_id + '/' + attachment_id + '-' + (i+1) + '.chunk';
console.log(input_filename);
fs.readFile(input_filename , (err, chunk_data) => {
if (err) throw err;
stream.write(chunk_data,function() {
chunk_count++;
if (chunk_count==num_chunks) {
console.log('join finished. closing stream');
stream.end();
}
});
});
}
}
The console:
joining files:
/home/attachments/comp-2084830518/67-1.chunk
/home/attachments/comp-2084830518/67-2.chunk
/home/attachments/comp-2084830518/67-3.chunk
/home/attachments/comp-2084830518/67-4.chunk
join finished. closing stream
Node version: v6.9.2
stream.write is an asynchronous operation. This means that multiple calls to it may be serviced out of order.
If you want your writes to happen in order, use stream.writeSync, or use the callback argument to stream.write to sequence your writes.

This code doesn't seem to fire in order?

My problem is that the code does not seem to be running in order, as seen below.
This code is for my discord.js bot that I am creating.
var Discord = require("discord.js");
var bot = new Discord.Client();
var yt = require("C:/Users/username/Documents/Coding/Discord/youtubetest.js");
var youtubetest = new yt();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var prefix = "!";
var vidid;
var commands = {
play: {
name: "!play ",
fnc: "Gets a Youtube video matching given tags.",
process: function(msg, query) {
youtubetest.respond(query, msg);
var vidid = youtubetest.vidid;
console.log(typeof(vidid) + " + " + vidid);
console.log("3");
}
}
};
bot.on('ready', () => {
console.log('I am ready!');
});
bot.on("message", msg => {
if(!msg.content.startsWith(prefix) || msg.author.bot || (msg.author.id === bot.user.id)) return;
var cmdraw = msg.content.split(" ")[0].substring(1).toLowerCase();
var query = msg.content.split("!")[1];
var cmd = commands[cmdraw];
if (cmd) {
var res = cmd.process(msg, query, bot);
if (res) {
msg.channel.sendMessage(res);
}
} else {
let msgs = [];
msgs.push(msg.content + " is not a valid command.");
msgs.push(" ");
msgs.push("Available commands:");
msgs.push(" ");
msg.channel.sendMessage(msgs);
msg.channel.sendMessage(commands.help.process(msg));
}
});
bot.on('error', e => { console.error(e); });
bot.login("mytoken");
The youtubetest.js file:
var youtube_node = require('youtube-node');
var ConfigFile = require("C:/Users/username/Documents/Coding/Discord/json_config.json");
var mybot = require("C:/Users/username/Documents/Coding/Discord/mybot.js");
function myyt () {
this.youtube = new youtube_node();
this.youtube.setKey(ConfigFile.youtube_api_key);
this.vidid = "";
}
myyt.prototype.respond = function(query, msg) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
msg.channel.sendMessage("There was an error finding requested video.");
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
}
});
console.log("2");
};
module.exports = myyt;
As the code shows, i have an object for the commands that the bot will be able to process, and I have a function to run said commands when a message is received.
Throughout the code you can see that I have put three console.logs with 1, 2 and 3 showing in which order I expect the parts of the code to run. When the code is run and a query is found the output is this:
I am ready!
string +
2
3
1
This shows that the code is running in the wrong order that I expect it to.
All help is very highly appreciated :)
*Update! Thank you all very much to understand why it isn't working. I found a solution where in the main file at vidid = youtubetest.respond(query, msg) when it does that the variable is not assigned until the function is done so it goes onto the rest of my code without the variable. To fix I simply put an if statement checking if the variable if undefined and waiting until it is defined.*
Like is mentioned before, a lot of stuff in javascript runs in async, hence the callback handlers. The reason it runs in async, is to avoid the rest of your code being "blocked" by remote calls. To avoid ending up in callback hell, most of us Javascript developers are moving more and more over to Promises. So your code could then look more like this:
myyt.prototype.respond = function(query, msg) {
return new Promise(function(resolve, reject) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
reject("There was an error finding requested video."); // passed down to the ".catch" statement below
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
resolve(2); // Resolve marks the promises as successfully completed, and passes along to the ".then" method
}
});
}).then(function(two) {
// video is now the same as myyt.vidid as above.
console.log(two);
}).catch(function(err) {
// err contains the error object from above
msg.channel.sendMessage(err);
})
};
This would naturally require a change in anything that uses this process, but creating your own prototypes seems.. odd.
This promise returns the vidid, so you'd then set vidid = youtubetest.response(query, msg);, and whenever that function gets called, you do:
vidid.then(function(id) {
// id is now the vidid.
});
Javascript runs async by design, and trying to hack your way around that leads you to dark places fast. As far as I can tell, you're also targetting nodeJS, which means that once you start running something synchronously, you'll kill off performance for other users, as everyone has to wait for that sync call to finish.
Some suggested reading:
http://callbackhell.com/
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
https://stackoverflow.com/a/11233849/3646975
I'd also suggest looking up ES6 syntax, as it shortens your code and makes life a hellofalot easier (native promises were only introduced in ES6, which NodeJS 4 and above supports (more or less))
In javascript, please remember that any callback function you pass to some other function is called asynchronously. I.e. the calls to callback function may not happen "in order". "In order" in this case means the order they appear on the source file.
The callback function is simply called on certain event:
When there is data to be processed
on error
in your case for example when the youtube search results are ready,
'ready' event is received or 'message' is received.
etc.

Node & MySQL - connection.query inside connection.query - Object property not accessible

I got a pretty awkward problem.
I create a pool, connect to the database, create a connection and query, get the results, do a bunch of stuff, then I have to create another connection and query, but actually it has to be dynamically so I loop over my Array teacherHours containing the data.
Then more Code is happening, and I have to create an extra loop, because certain elements of my teacherHours Array have to try multiple times to get the correct response from the upcoming query.
So another loop follows, which is supposed to loop as long as availableHours > 0. Now, here is where it all goes left.
A buch of code happens inside the second loop, I prepare my second query, call connection.query() and inside of the callback function I prepare my third query (after doing some other stuff) and this is actually where Node kicks me out.
It gives me TypeError: Cannot read property 'tid' of undefined. tid needs to be accessed for my third query, so I try to access it just like I did before but Node doesn't allow it.
I know that the queries return useful data (rows) so it can't be a problem of querying but receiving no data. Actually I console.log("the rowRIDS"+rowRIDS); the result of the second query and I see that it returns 2 rows and just after that it gives me the error.
What is also strange to me, all the console.logs inside my my two loops are being logged, and my console.log of the second query (containing the 2 rows) are being logged after the loops ran through, since the queries are nested shouldn't the returned 2 rows and the error appear within the first iteration of the loop, since the code should access the second query at that point.
BTW, I've tried to set a hardcoded number instead of the tid just to get the next property datum to be an error. I kind of got a feeling as if the variable teacherHours is out of scope, but it is supposed to be a global variable.
To get a better feeling of what I'm talking about I copied the code and uncommented all the javascript code, where I populate and calculate stuff. Any help would be really great, its been almost 7 hours of try & error without any luck. Thank You!
pool.getConnection(function(err, connection){
if (err) throw err;
connection.query('SELECT * FROM teachers_teaching_tbl WHERE fwdid = 1 ', function(err, rows, fields) {
if (err) {
console.error('error querying: ' + err.stack);
return;
}
rowArray=rows;
console.log(rowArray);
//
// HERE HAPPENS
// A LOOOOT OF STUFF
//
// AND teacherHours IS BEING POPULATED
//
// THEN COMES A FOR LOOP
for(var i=0; i<teacherHours.length;i++){
//
// MORE STUFF
//
//AND ANOTHER LOOP
while(availableHours>0){//do{ ORIGINALLY I TRIED WITH A DO WHILE LOOP
//
// AGAIN A BUNCH OF STUFF
//
// NOW I'M PREPARING MY NEXT QUERY
//
var myQueryGetFreeRoom=" SELECT rms.rid FROM rooms_tbl as rms WHERE NOT EXISTS ( ";
myQueryGetFreeRoom+=" SELECT NULL FROM classes_tbl as cls ";
myQueryGetFreeRoom+=" WHERE ( (cls.bis > '"+bisMinus1+"' AND cls.bis <= '"+realBis+"' ) OR ( cls.von > '"+bisMinus1+"' AND cls.von < '"+realBis+"' ) ) AND (cls.datum = '"+teacherHours[i].datum.getFullYear()+"-"+(teacherHours[i].datum.getMonth()+1)+"-"+teacherHours[i].datum.getDate()+"') AND (cls.rid=rms.rid) ) ";
//
//
connection.query(myQueryGetFreeRoom, function(err, rowRIDS, fields) {
if (err) {
console.error('error querying: ' + err.stack);
return;
}
roomIDs=rowRIDS;
console.log("the rowRIDS"+rowRIDS);
//
// MORE STUFF
// HAPPENING
//
if(roomIDs.length>0){
//
// PREPARING QUERY NO.3 - WHICH IS WHERE MY ERROR POINTS - TO THE USE OF tid PROPERTY
//
var myQueryBookClass = " INSERT INTO classes_tbl ( rid , tid , belegtAnz, datum, von , bis ) ";
myQueryBookClass+=" VALUES ( "+Math.floor(Math.random() * roomIDs.length)+", "+teacherHours[i].tid+" , 0, '"+teacherHours[i].datum.getFullYear()+"-"+(teacherHours[i].datum.getMonth()+1)+"-"+teacherHours[i].datum.getDate()+"' , '"+bisMinus1+"' , '"+realBis+"' ) ";
console.log("myQueryBookClass: "+myQueryBookClass);
availableHours = 0;
//
// HERE WAS SUPPOSED TO FOLLOW QUERY 3 - myQueryBookClass
//
// BUT SINCE I DONT EVEN GET INSIDE HERE IT IS IN COMMENTS
//
/*connection.query(myQueryBookClass, function(err, insertRows, fields){
if(err){
console.error('error querying: '+err.stack);
return;
}
console.log("Inserted Rows: "+ insertRows);
}); */
} else {
availableHours= availableHours - 1;
//
// STUFF HAPPENING
//
}
});
availableHours= availableHours - 1;
}//while(availableHours>0);
//
}
connection.release(function(err){
if (err){
console.error('error disconnecting: ' + err.stack);
return;
}
});
});
});
I think you are coming from a non-async language like Python, Java, etc. which is why Node, i.e. JavaScript, seems to screw things up for you, but actually it isn't.
The problem you have in your code is that you execute async functions like query synchronously all at the same time in the same while loop. You need to use a module like async which helps to run and collect results asynchronously.
Here is the updated code.
var async = require('async'),
connection;
async.waterfall([
function (cb) {
pool.getConnection(cb);
},
function (conn, cb) {
connection = conn;
connection.query('SELECT * FROM teachers_teaching_tbl WHERE fwdid = 1', cb);
},
function (rows, fields, cb) {
rowArray = rows;
console.log(rowArray);
// HERE HAPPENS
// A LOOOOT OF STUFF
//
// AND teacherHours IS BEING POPULATED
//
// THEN COMES A FOR LOOP
async.eachSeries(teacherHours, function (teacherHour, done) {
// MORE STUFF
//
//AND ANOTHER LOOP
async.whilst(function () {
return availableHours > 0;
}, function (cb) {
// AGAIN A BUNCH OF STUFF
//
// NOW I'M PREPARING MY NEXT QUERY
//
var myQueryGetFreeRoom =
"SELECT rms.rid FROM rooms_tbl AS rms WHERE NOT EXISTS ("
+ "SELECT NULL FROM classes_tbl AS cls"
+ " WHERE ("
+ "(cls.bis > '" + bisMinus1 + "' AND cls.bis <= '" + realBis + "')"
+ " OR (cls.von > '" + bisMinus1 + "' AND cls.von < '" + realBis + "')"
+ ") AND ("
+ "cls.datum = '" + teacherHour.datum.getFullYear() + "-" + (teacherHour.datum.getMonth() + 1) + "-" + teacherHour.datum.getDate() + "'"
+ ") AND cls.rid = rms.rid";
async.waterfall([
function (cb) {
connection.query(myQueryGetFreeRoom, cb);
},
function(rowRIDS, fields, cb) {
roomIDs = rowRIDS;
console.log("the rowRIDS" + rowRIDS);
//
// MORE STUFF
// HAPPENING
//
if (roomIDs.length > 0) {
//
// PREPARING QUERY NO.3 - WHICH IS WHERE MY ERROR POINTS - TO THE USE OF tid PROPERTY
//
var myQueryBookClass = "INSERT INTO classes_tbl (rid, tid, belegtAnz, datum, von, bis) VALUES ("
+ Math.floor(Math.random() * roomIDs.length)
+ ", " + teacherHour.tid
+ ", 0, '" + teacherHour.datum.getFullYear() + "-" + (teacherHour.datum.getMonth() + 1) + "-" + teacherHour.datum.getDate() + "', '" + bisMinus1 + "', '" + realBis + "')";
console.log("myQueryBookClass: " + myQueryBookClass);
availableHours = 0;
//
// HERE WAS SUPPOSED TO FOLLOW QUERY 3 - myQueryBookClass
//
// BUT SINCE I DONT EVEN GET INSIDE HERE IT IS IN COMMENTS
//
connection.query(myQueryBookClass, function (err, insertRows, fields) {
if (err) {
console.error('error querying: '+err.stack);
return;
}
console.log("Inserted Rows: "+ insertRows);
// Here do whatever you need to do, then call the callback;
cb();
});
} else {
--availableHours;
//
// STUFF HAPPENING
//
cb();
}
}
], function (err) {
if (!err) {
// Notice that you are decrementing the `availableHours` twice here and above.
// Make sure this is what you want.
--availableHours;
}
cb(err);
});
}, done);
}, function (err) {
connection.release(function (err) {
if (err) {
console.error('error disconnecting: ' + err.stack);
return;
}
});
});
}
], function (err) {
conn && pool.release(conn);
err && throw err;
});
Next time please format your code properly for better readability which will help yourself to get answers faster, and split your question text into paragraphs for the same purpose.
Explanation
There are four nested async flows:
async.waterfall
-> async.eachSeries
-> async.whilst
-> async.waterfall
Basically, the async.waterfall library allows you to execute a list of functions in series.
Every next function will be executed only after the previous function has returned the response.
To indicate that a function is completed and the results are available, it has to call the callback, in our case it is cb (you can call it whatever you like, eg. callback). The rule is to call it, otherwise, the next function will never be executed because the previous one doesn't seem to have finished its work.
Once the previous function has completed, it calls the provided cb with the following signature:
cb(err, connection);
If there was an error while requesting for a connection, the entire async.waterfall will interrupt and the final callback function will be executed.
If there was no error, the connection will be provided as a second argument. async module passes all arguments of the previous function to the next function as the first, second, etc. arguments which is why the second function receives the conn as the first argument.
Every next function will receive the callback cb as the last argument, which you must eventually call when the job is done.
Thus, in the first async.waterfall flow:
It requests a new database connection.
Once the connection is available, the next function is executed which sends a query to the database.
Waits for the query results, then once the results are available, it is ready to run the next function which iterates over each row.
async.eachSeries allows to iterate over a gives array of values sequentially.
In the second async.eachSeries flow:
It iterates over each element in the teacherHours array sequentially.
Once each element is processed (however you want), you must call the done callback. Again, you could have called this as cb like in the previous async.waterfall or callback. done is just for clarity that the process is done.
Then we have the async.whilst which provides the same logic as the normal while () {} statement but handles the loop asynchronously.
In this third async.whilst flow:
Calls the first function. Its return value indicates whether it has to continue the loop, i.e. call the second asynchronous function.
If the return value is truthful (availableHours > 0), then the second function is called.
When the async function is done, it must call the provided callback cb to indicate that it is over. Then async module will call the first function to check if it has to continue the loop.
In this asynchronous function inside async.whilst we have another async.waterfall because you need to send queries to the database for each teacherHour.
In this last fourth async.watercall flow:
It sends the SELECT query to the database.
Waits for the response. Once the rowRIDS are available, it calls the second function in the waterfall.
If there are rowRIDS (roomIDs.length > 0), it sends the INSERT query to the database.
Once done, it calls the callback cb.
If there were no rowRIDs, it calls the callback cb, too, to indicate that the job is done.
It is a great thing that JavaScript is asynchronous. It might be difficult at the beginning when you convert from other synchronous languages, but once you get the idea, it will be hard to thing synchronously. It becomes so intuitive that you will start thinking why other languages don't work asynchronous.
I hope I could explain the above code thoroughly. Enjoy JavaScript! It's awesome!

child process spawn returning NaN

I'm trying to understand child process module in node.js, I created a parent file which has code:
var spawn=require("child_process").spawn;
var child=spawn("node",['plus_one.js']);
setInterval(function(){
//var number=Math.floor(Math.random()*10000);
var number=10;
child.stdin.write(number + "\n");
child.stdout.once("data",function(data){
console.log("Child replied to "+number + " with " + data);
})
},1000);
child.stderr.on("data",function(data){
//process.stdout.write(data);
console.log("error"+data)
})
The child file looks like this:
process.stdin.resume();
process.stdin.on("data",function(data){
var number;
try{
number = parseInt(data.toString(), 10);
number+=1;
process.stdout.write(number+"\n");
}
catch(err){
process.stderr.write(err.message+"lol");
}
})
If I execute just the child file it works fine , but when i execute the main file it always return NaN; why is that?
Also as im trying to understand it, I quite do not understand the difference between child_process.spawn and .exec, spawn return stream so it has stdin/stdout while exec returns buffer, does it mean that .exec cannot communicate with child file (and vice versa) other than passing variable with options/env object in it?
Making a few tweaks to your code, it is now working for me:
var spawn = require("child_process").spawn;
var child = spawn("node", ['plus_one.js']);
var number = 10;
setInterval(function () {
child.stdin.write(number++ + "\n");
child.stdout.once("data", function (data) {
console.log("Child replied to " + number + " with " + data);
});
}, 1000);
child.stderr.on("data", function (data) {
//process.stdout.write(data);
console.log("error" + data)
});
Changes:
Used proper bracing on the setInterval() function.
Used .once() in the appropriate place so event handlers don't pile up.
Moved the number variable outside of the setInterval() scope so that it can retain its value from one call to the next.

Incorrect header check zlib

Running the code below to to download and unzip files. It works as intended when I try with one but when I do multiple at the same time I get the following error:
Error: incorrect header check at Zlib._handle.onerror
var downloadUnzipFile = function (mID) {
try {
// Read File
console.log("Started download/unzip of merchant: " + mID + " # " + new Date().format('H:i:s').toString());
request(linkConst(mID))
// Un-Gzip
.pipe(zlib.createGunzip())
// Write File
.pipe(fs.createWriteStream(fileName(mID)))
.on('error', function (err) {
console.error(err);
})
.on('finish', function() {
console.log("CSV created: " + fileName(mID));
console.log("Completed merchant: " + mID + " # " + new Date().format('H:i:s').toString());
//console.log("Parsing CSV...");
//csvReader(fileName);
});
} catch (e) {
console.error(e);
}
}
module.exports = function(sMerchants) {
var oMerchants = JSON.parse(JSON.stringify(sMerchants));
oMerchants.forEach(function eachMerchant(merchant) {
downloadUnzipFile(merchant.merchant_aw_id);
})
};
Any ideas?
Thanks
EDIT:
To clarify, i'd like to run through each item (merchant) in the array (merchants) and download a file + unzip it. The way I currently do it means it this downloading/zipping occurs at the sametime (which I think might be causing the error). When i remove the foreach loop and just try to download/zip one merchant the code works.
Yeah, as you suggest, it's likely that if you try to unzip too many files concurrently, you will run out of memory. Because you are handling streams, the unzip operations are asynchronous, meaning your forEach loop will continue to be called before each unzip operation completes. There are plenty of node packages that allow you to handle async operations so you can run the unzip function sequentially, but the simplest approach might just be to use a recursive function call. E.g.:
var downloadUnzipFile = function (mID) {
try {
// Read File
console.log("Started download/unzip of merchant: " + mID + " # " + new Date().format('H:i:s').toString());
return request(linkConst(mID))
// Un-Gzip
.pipe(zlib.createGunzip())
// Write File
.pipe(fs.createWriteStream(fileName(mID)))
} catch (e) {
console.log(e);
return false;
}
}
module.exports = function(sMerchants) {
var merchants = JSON.parse(JSON.stringify(sMerchants)),
count = 0;
downloadUnzipFile(merchants[count][merchant_aw_id])
.on('error', function(err){
console.log(err);
// continue unzipping files, even if you encounter an error. You can also remove these lines if you want the script to exit.
if(merchants[++count]){
downloadUnzipFile(merchants[count][merchant_aw_id]);
}
})
.on('finish', function() {
if(merchants[++count]){
downloadUnzipFile(merchants[count][merchant_aw_id]);
}
});
};
Haven't tested, of course. The main idea should work thought: call downloadUnzipFile recursively whenever the previous call errors out or finishes, as long as there are still items in the merchants array.

Categories

Resources