Async queue, filestream end how to know when both finished - javascript

I am having a slight issue when using async.queue with a filestream
I have a scenario where my filestream will finish
I set fileRead to true
however the queue will be empty and already have called drain
this then leads my "done" to never be called
what is the proper way to say "end the queue" after my filestream is "end" and the queue is empty?
var fs = require('fs')
, util = require('util')
, stream = require('stream')
, es = require('event-stream');
var async = require('async');
var fileRead = false;
var lineNr = 0;
var q = async.queue(function(task, callback) {
task(function(err, lineData){
responseLines.push(lineData);
callback();
});
}, 5);
var q.drain = function() {
if(fileRead){
done(null, responseLines);
}
}
var s = fs.createReadStream('very-large-file.csv')
.pipe(es.split())
.pipe(es.mapSync(function(line){
s.pause();
q.push(async.apply(insertIntoDb, line))
s.resume();
})
.on('error', function(err){
done(err);
})
.on('end', function(){
fileRead = true;
})
);
or is there a better use of async which would allow me to do this?
async process line by line with the ability to exit early if one of the lines has errors

Firstly, I'm not sure how much of your snippet is pseudo code but var q.drain = ... is not valid javascript and should error. It should just be q.drain = as you're defining a property on an existing object not declaring a new variable. This could be why your drain function isn't firing if it isn't pseudo code.
There are a few ways you could achieve what I think you're trying to do. One would be to check the length of the queue in your end handler and set the drain function if there are still items to process.
.on('end', function(){
if(!q.length){
callDone();
}
else {
q.drain = callDone;
}
});
function callDone(){
done(null, responseLines);
}
This is effectively saying "if the queue's been processed call done, if not, call done when it has!" I'm certain there are lots of ways to tidy up your code but hopefully this provides a solution to your specific problem.

Related

How to execute synchronous HTTP requests in JavaScript

I need to execute unknown number of http requests in a node.js program, and it needs to happen synchronously. only when one get the response the next request will be execute. How can I implement that in JS?
I tried it synchronously with the requset package:
function HttpHandler(url){
request(url, function (error, response, body) {
...
})
}
HttpHandler("address-1")
HttpHandler("address-2")
...
HttpHandler("address-100")
And asynchronously with request-promise:
async function HttpHandler(url){
const res = await request(url)
...
}
HttpHandler("address-1")
HttpHandler("address-2")
...
HttpHandler("address-100")
Non of them work. and as I said I can have unknown number of http request over the program, it depends on the end user.
Any ideas on to handle that?
Use the got() library, not the request() library because the request() library has been deprecated and does not support promises. Then, you can use async/await and a for loop to sequence your calls one after another.
const got = require('got');
let urls = [...]; // some array of urls
async function processUrls(list) {
for (let url of urls) {
await got(url);
}
}
processUrls(urls).then(() => {
console.log("all done");
}).catch(err => {
console.log(err);
});
You are claiming some sort of dynamic list of URLs, but won't show how that works so you'll have to figure out that part of the logic yourself. I'd be happy to show how to solve that part, but you haven't given us any idea how that should work.
If you want a queue that you can regularly add items to, you can do something like this:
class sequencedQueue {
// fn is a function to call on each item in the queue
// if its asynchronous, it should return a promise
constructor(fn) {
this.queue = [];
this.processing = false;
this.fn = fn;
}
add(...items) {
this.queue.push(...items);
return this.run();
}
async run() {
// if not already processing, start processing
// because of await, this is not a blocking while loop
while (!this.processing && this.queue.length) {
try {
this.processing = true;
await this.fn(this.queue.shift());
} catch (e) {
// need to decide what to do upon error
// this is currently coded to just log the error and
// keep processing. To end processing, throw an error here.
console.log(e);
} finally {
this.processing = false;
}
}
}
}

NodeJS readdirSync() not executed

I'm a beginner in non-blocking environment, such NodeJS. Below is my simple code, which list all files in directory :
var readline = require('readline');
var rl = readline.createInterface(process.stdin, process.stdout);
var fs = require('fs');
var datafolder = './datafolder';
var datafoldername = 'datafolder';
rl.setPrompt('Option> ');
rl.prompt();
rl.on('line', function(line) {
if (line === "right") rl.close();
if (line == '1') {
listFile();
}
rl.prompt();
}).on('close', function() {
process.exit(0);
});
function listFile() {
console.log(`File(s) on ${datafolder}`);
fs.readdirSync(datafolder, (err, files) => {
if (err) {
console.log(err);
} else {
files.forEach(filename => {
console.log(filename);
});
}
});
}
If user press 1, it's suppose to execute method listFile and show all files inside.
My question is, why fs.readdirSync not executed? The program works if I do it with readdir(), but it'll mess the output to user.
You are passing a callback to fs.readdirSync() but *Sync() functions don't take callbacks. The callback is never run (because the function does not take a callback), so you see no output. But fs.readdirSync() does in fact execute.
fs.readdirSync() simply returns it's value (which may make the program easier to read, but also means the call will block, which may be OK depending on what your program does and how it is used.)
var resultsArray = fs.readdirSync(datafolder);
(You may want to wrap it in a try/catch for error handling.)

This code doesn't seem to fire in order?

My problem is that the code does not seem to be running in order, as seen below.
This code is for my discord.js bot that I am creating.
var Discord = require("discord.js");
var bot = new Discord.Client();
var yt = require("C:/Users/username/Documents/Coding/Discord/youtubetest.js");
var youtubetest = new yt();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var prefix = "!";
var vidid;
var commands = {
play: {
name: "!play ",
fnc: "Gets a Youtube video matching given tags.",
process: function(msg, query) {
youtubetest.respond(query, msg);
var vidid = youtubetest.vidid;
console.log(typeof(vidid) + " + " + vidid);
console.log("3");
}
}
};
bot.on('ready', () => {
console.log('I am ready!');
});
bot.on("message", msg => {
if(!msg.content.startsWith(prefix) || msg.author.bot || (msg.author.id === bot.user.id)) return;
var cmdraw = msg.content.split(" ")[0].substring(1).toLowerCase();
var query = msg.content.split("!")[1];
var cmd = commands[cmdraw];
if (cmd) {
var res = cmd.process(msg, query, bot);
if (res) {
msg.channel.sendMessage(res);
}
} else {
let msgs = [];
msgs.push(msg.content + " is not a valid command.");
msgs.push(" ");
msgs.push("Available commands:");
msgs.push(" ");
msg.channel.sendMessage(msgs);
msg.channel.sendMessage(commands.help.process(msg));
}
});
bot.on('error', e => { console.error(e); });
bot.login("mytoken");
The youtubetest.js file:
var youtube_node = require('youtube-node');
var ConfigFile = require("C:/Users/username/Documents/Coding/Discord/json_config.json");
var mybot = require("C:/Users/username/Documents/Coding/Discord/mybot.js");
function myyt () {
this.youtube = new youtube_node();
this.youtube.setKey(ConfigFile.youtube_api_key);
this.vidid = "";
}
myyt.prototype.respond = function(query, msg) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
msg.channel.sendMessage("There was an error finding requested video.");
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
}
});
console.log("2");
};
module.exports = myyt;
As the code shows, i have an object for the commands that the bot will be able to process, and I have a function to run said commands when a message is received.
Throughout the code you can see that I have put three console.logs with 1, 2 and 3 showing in which order I expect the parts of the code to run. When the code is run and a query is found the output is this:
I am ready!
string +
2
3
1
This shows that the code is running in the wrong order that I expect it to.
All help is very highly appreciated :)
*Update! Thank you all very much to understand why it isn't working. I found a solution where in the main file at vidid = youtubetest.respond(query, msg) when it does that the variable is not assigned until the function is done so it goes onto the rest of my code without the variable. To fix I simply put an if statement checking if the variable if undefined and waiting until it is defined.*
Like is mentioned before, a lot of stuff in javascript runs in async, hence the callback handlers. The reason it runs in async, is to avoid the rest of your code being "blocked" by remote calls. To avoid ending up in callback hell, most of us Javascript developers are moving more and more over to Promises. So your code could then look more like this:
myyt.prototype.respond = function(query, msg) {
return new Promise(function(resolve, reject) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
reject("There was an error finding requested video."); // passed down to the ".catch" statement below
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
resolve(2); // Resolve marks the promises as successfully completed, and passes along to the ".then" method
}
});
}).then(function(two) {
// video is now the same as myyt.vidid as above.
console.log(two);
}).catch(function(err) {
// err contains the error object from above
msg.channel.sendMessage(err);
})
};
This would naturally require a change in anything that uses this process, but creating your own prototypes seems.. odd.
This promise returns the vidid, so you'd then set vidid = youtubetest.response(query, msg);, and whenever that function gets called, you do:
vidid.then(function(id) {
// id is now the vidid.
});
Javascript runs async by design, and trying to hack your way around that leads you to dark places fast. As far as I can tell, you're also targetting nodeJS, which means that once you start running something synchronously, you'll kill off performance for other users, as everyone has to wait for that sync call to finish.
Some suggested reading:
http://callbackhell.com/
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
https://stackoverflow.com/a/11233849/3646975
I'd also suggest looking up ES6 syntax, as it shortens your code and makes life a hellofalot easier (native promises were only introduced in ES6, which NodeJS 4 and above supports (more or less))
In javascript, please remember that any callback function you pass to some other function is called asynchronously. I.e. the calls to callback function may not happen "in order". "In order" in this case means the order they appear on the source file.
The callback function is simply called on certain event:
When there is data to be processed
on error
in your case for example when the youtube search results are ready,
'ready' event is received or 'message' is received.
etc.

How to ensure asynchronous code is executed after a stream is finished processing?

I have a stream that I process by listening for the data,error, and end events, and I call a function to process each data event in the first stream. Naturally, the function processing the data calls other callbacks, making it asynchronous. So how do I start executing more code when the data in the stream is processed? Listening for the end event in the stream does NOT mean the asynchronous data processing functions have finished.
How can I ensure that the stream data processing functions are finished when I execute my next statement?
Here is an example:
function updateAccountStream (accountStream, callThisOnlyAfterAllAccountsAreMigrated) {
var self = this;
var promises = [];
accountStream
.on('data', function (account) {
migrateAccount.bind(self)(account, finishMigration);
})
.on('error', function (err) {
return console.log(err);
})
.on('end', function () {
console.log("Finished updating account stream (but finishMigration is still running!!!)");
callThisOnlyAfterAllAccountsAreMigrated() // finishMigration is still running!
});
}
var migrateAccount = function (oldAccount, callback) {
executeSomeAction(oldAccount, function(err, newAccount) {
if (err) return console.log("error received:", err);
return callback(newAccount);
});
}
var finishMigration = function (newAccount) {
// some code that is executed asynchronously...
}
How do I ensure that callThisOnlyAfterAllAccountsAreMigrated is called AFTER the stream has been processed?
Can this be done with promises? Can it be done with through streams? I am working with Nodejs, so referencing other npm modules could be helpful.
As you said, listening for the end event on the stream is useless on its own. The stream doesn't know or care what you're doing with the data in your data handler, so you would need to write some code to keep track of your own migrateAccount state.
If it were me, I would rewrite this whole section. If you use the readable event with .read() on your stream, you can read as many items at a time as you feel like dealing with. If that's one, no problem. If it's 30, great. The reason you do this is so that you won't overrun whatever is doing work with the data coming from the stream. As-is right now, if accountStream is fast, your application will undoubtedly crash at some point.
When you read an item from a stream and start work, take the promise you get back (use Bluebird or similar) and throw it into an array. When the promise is resolved, remove it from the array. When the stream ends, attach a .done() handler to .all() (basically making one big promise out of every promise still in the array).
You could also use a simple counter for jobs in progress.
Using a through stream (the npm through2 module), I solved this problem using the following code that controls the asynchronous behaviour:
var through = require('through2').obj;
function updateAccountStream (accountStream, callThisOnlyAfterAllAccountsAreMigrated) {
var self = this;
var promises = [];
accountStream.pipe(through(function(account, _, next) {
migrateAccount.bind(self)(account, finishMigration, next);
}))
.on('data', function (account) {
})
.on('error', function (err) {
return console.log(err);
})
.on('end', function () {
console.log("Finished updating account stream");
callThisOnlyAfterAllAccountsAreMigrated();
});
}
var migrateAccount = function (oldAccount, callback, next) {
executeSomeAction(oldAccount, function(err, newAccount) {
if (err) return console.log("error received:", err);
return callback(newAccount, next);
});
}
var finishMigration = function (newAccount, next) {
// some code that is executed asynchronously, but using 'next' callback when migration is finished...
}
It is a lot easier when you handle streams via promises.
Copied from here, an example that uses spex library:
var spex = require('spex')(Promise);
var fs = require('fs');
var rs = fs.createReadStream('values.txt');
function receiver(index, data, delay) {
return new Promise(function (resolve) {
console.log("RECEIVED:", index, data, delay);
resolve(); // ok to read the next data;
});
}
spex.stream.read(rs, receiver)
.then(function (data) {
// streaming successfully finished;
console.log("DATA:", data);
}, function (reason) {
// streaming has failed;
console.log("REASON:", reason);
});

Asynchronously Perform Recursive Data Tree Construction?

I am working on a web application that makes use of a file tree. The frontend JavaScript performs an ajax request to my Node.js server which calls my browse2 exported function. This function is then responsible for supplying the correct path to my function, getFolderContents(), that recursively builds the file system hierarchy object structure.
My issue is that I am currently doing things synchronously. Having done research into the inner workings of Node.js, it seems as though I should avoid synchronous operations at all costs. As such, I wanted to convert my code to work asynchronously. However, I couldn't get it working and all of my solutions were convoluted.
I have tried managing the flow using the "async" package. I had no luck with figuring that out. I tried implementing my own system of counters/loops/callbacks to determine when processes had finished executing. Ultimately, I suppose I can't wrap my mind around asynchronous execution flow.
I would like to ask two questions:
1. In this case, would it be detrimental to perform this request synchronously instead of asynchronously?
2. If yes to the first question, how should I go about converting this code to be asynchronous?
Note: When I tried to do things asynchronously, I used each synchronous function's asynchronous counterpart.
Below is my synchronous (working) code:
var path = require('path');
var fs = require('fs');
exports.browse2 = function(request, response) {
var tree = getFolderContents('C:\\Users\\AccountName\\folder1\\folder2\\folder3\\test\\');
response.send(tree);
};
function getFolderContents(route) {
var branch = {};
branch.title = path.basename(route);
branch.folder = true;
branch.children = [];
var files = fs.readdirSync(route);
var size = files.length;
for (var i = 0; i < size; i++) {
var file = files[i];
var concatPath = path.join(route, file);
if (fs.lstatSync(concatPath).isDirectory())
branch.children.push(getFolderContents(concatPath));
else
branch.children.push({
"title" : path.basename(file),
"path" : file
});
}
return branch;
}
I appreciate all input!
Edit:
Added asynchronous code attempt. Not fully working. Only a part of the tree is received.
exports.browse2 = function(request, response) {
getFolderContents(
'C:\\Users\\AccountName\\folder1\\folder2\\folder3\\test\\',
function(tree) {
response.send(tree);
});
};
function getFolderContents(route, callback) {
var branch = {};
branch.title = path.basename(route);
branch.folder = true;
branch.children = [];
fs.readdir(route, function(err, files) {
files.forEach(function(file) {
var concatPath = path.join(route, file);
fs.lstat(concatPath, function(err, stats) {
if (stats.isDirectory())
branch.children.push(getFolderContents(concatPath, callback));
else
branch.children.push({
"title" : path.basename(file),
"path" : file
});
callback(branch);
});
});
});
}
The basic problem you're having is that when you use asynchronous calls, you can't just assign things to the return of the function. The entire point of async is that the function won't wait. So for example:
function get_data(a) {
var data = some_async_call(a);
//at this point, data is undefined because execution won't wait on the calls to finish
data.do_something(); // this breaks because of the above
}
So instead what you do is pass an anonymous function to the asynchronous function called a callback, and the asynchronous function calls that function once the operations actually complete. The above example would become this:
function get_data(a) {
some_async_call(a, function(data) {
data.do_something();
});
}
function some_async_call(variable, callback) {
call_async({
data: variable,
success: callback
});
}
And in your case that would look like this:
exports.browse2 = function(request, response) {
getFolderContents('C:\\Users\\AccountName\\folder1\\folder2\\folder3\\test\\', function(tree) {
response.send(tree);
});
};
function getFolderContents(route, callback) {
var branch = {};
branch.title = path.basename(route);
...
callback(branch);
}
If you're familiar with setTimetout, this is how that works - the design pattern is to pass an anonymous function that does the work, and that function then executes once the data/information is actually available.
I managed to get it working. Here are my answers to my own questions:
It is better to perform the tasks asynchronously because to do it otherwise would mean that the application would block other users from receiving their responses until subsequent requests have been responded to.
The way to convert the synchronous code to asynchronous code is to use a parallel loop. The code for my particular case is this:
var path = require('path');
var fs = require('fs');
exports.browse2 = function(request, response) {
getFolderContents(
'C:\\Users\\AccountName\\folder1\\folder2\\folder3\\test\\',
function(err, tree) {
if (err)
throw err;
response.send(tree);
});
};
function getFolderContents(route, callback) {
var branch = {};
branch.title = path.basename(route);
branch.folder = true;
branch.children = [];
fs.readdir(route, function(err, files) {
if (err)
return callback(err);
var pending = files.length;
if (!pending)
return callback(null, branch);
files.forEach(function(file) {
var concatPath = path.join(route, file);
fs.lstat(concatPath, function(err, stats) {
if (stats && stats.isDirectory()) {
getFolderContents(concatPath, function(err, res) {
branch.children.push(res);
if (!--pending)
callback(null, branch);
});
} else {
branch.children.push({
"title" : path.basename(file),
"path" : file
});
if (!--pending)
callback(null, branch);
}
});
});
});
}
Thanks to user "chjj" with his response to a similar question on this thread: node.js fs.readdir recursive directory search
And thanks to user "Dan Smolinske" for directing me to the thread.

Categories

Resources