Running the code below to to download and unzip files. It works as intended when I try with one but when I do multiple at the same time I get the following error:
Error: incorrect header check at Zlib._handle.onerror
var downloadUnzipFile = function (mID) {
try {
// Read File
console.log("Started download/unzip of merchant: " + mID + " # " + new Date().format('H:i:s').toString());
request(linkConst(mID))
// Un-Gzip
.pipe(zlib.createGunzip())
// Write File
.pipe(fs.createWriteStream(fileName(mID)))
.on('error', function (err) {
console.error(err);
})
.on('finish', function() {
console.log("CSV created: " + fileName(mID));
console.log("Completed merchant: " + mID + " # " + new Date().format('H:i:s').toString());
//console.log("Parsing CSV...");
//csvReader(fileName);
});
} catch (e) {
console.error(e);
}
}
module.exports = function(sMerchants) {
var oMerchants = JSON.parse(JSON.stringify(sMerchants));
oMerchants.forEach(function eachMerchant(merchant) {
downloadUnzipFile(merchant.merchant_aw_id);
})
};
Any ideas?
Thanks
EDIT:
To clarify, i'd like to run through each item (merchant) in the array (merchants) and download a file + unzip it. The way I currently do it means it this downloading/zipping occurs at the sametime (which I think might be causing the error). When i remove the foreach loop and just try to download/zip one merchant the code works.
Yeah, as you suggest, it's likely that if you try to unzip too many files concurrently, you will run out of memory. Because you are handling streams, the unzip operations are asynchronous, meaning your forEach loop will continue to be called before each unzip operation completes. There are plenty of node packages that allow you to handle async operations so you can run the unzip function sequentially, but the simplest approach might just be to use a recursive function call. E.g.:
var downloadUnzipFile = function (mID) {
try {
// Read File
console.log("Started download/unzip of merchant: " + mID + " # " + new Date().format('H:i:s').toString());
return request(linkConst(mID))
// Un-Gzip
.pipe(zlib.createGunzip())
// Write File
.pipe(fs.createWriteStream(fileName(mID)))
} catch (e) {
console.log(e);
return false;
}
}
module.exports = function(sMerchants) {
var merchants = JSON.parse(JSON.stringify(sMerchants)),
count = 0;
downloadUnzipFile(merchants[count][merchant_aw_id])
.on('error', function(err){
console.log(err);
// continue unzipping files, even if you encounter an error. You can also remove these lines if you want the script to exit.
if(merchants[++count]){
downloadUnzipFile(merchants[count][merchant_aw_id]);
}
})
.on('finish', function() {
if(merchants[++count]){
downloadUnzipFile(merchants[count][merchant_aw_id]);
}
});
};
Haven't tested, of course. The main idea should work thought: call downloadUnzipFile recursively whenever the previous call errors out or finishes, as long as there are still items in the merchants array.
Related
Lets say this is my code (just a sample I wrote up to show the idea)
var extract = require("./postextract.js");
var rescore = require("./standardaddress.js");
RunFunc();
function RunFunc() {
extract.Start();
console.log("Extraction complete");
rescore.Start();
console.log("Scoring complete");
}
And I want to not let the rescore.Start() run until the entire extract.Start() has finished. Both scripts contain a spiderweb of functions inside of them, so having a callback put directly into the Start() function is not appearing viable as the final function won't return it, and I am having a lot of trouble understanding how to use Promises. What are ways I can make this work?
These are the scripts that extract.Start() begins and ends with. OpenWriter() is gotten to through multiple other functions and streams, with the actual fileWrite.write() being in another script that's attached to this (although not needed to detect the end of run. Currently, fileWrite.on('finish') is where I want the script to be determined as done
module.exports = {
Start: function CodeFileRead() {
//this.country = countryIn;
//Read stream of thate address components
fs.createReadStream("Reference\\" + postValid.country + " ADDRESS REF DATA.csv")
//Change separator based on file
.pipe(csv({escape: null, headers: false, separator: delim}))
//Indicate start of reading
.on('resume', (data) => console.log("Reading complete postal code file..."))
//Processes lines of data into storage array for comparison
.on('data', (data) => {
postValid.addProper[data[1]] = JSON.stringify(Object.values(data)).replace(/"/g, '').split(',').join('*');
})
//End of reading file
.on('end', () => {
postValid.complete = true;
console.log("Done reading");
//Launch main script, delayed to here in order to not read ahead of this stream
ThisFunc();
});
},
extractDone
}
function OpenWriter() {
//File stream for writing the processed chunks into a new file
fileWrite = fs.createWriteStream("Processed\\" + fileName.split('.')[0] + "_processed." + fileName.split('.')[1]);
fileWrite.on('open', () => console.log("File write is open"));
fileWrite.on('finish', () => {
console.log("File write is closed");
});
}
EDIT: I do not want to simply add the next script onto the end of the previous one and forego the master file, as I don't know how long it will be and its supposed to be designed to be capable of taking additional scripts past our development period. I cannot just use a package as it stands because approval time in the company takes up to two weeks and I need this more immediately
DOUBLE EDIT: This is all my code, every script and function is all written by me, so I can make the scripts being called do what's needed
You can just wrap your function in Promise and return that.
module.exports = {
Start: function CodeFileRead() {
return new Promise((resolve, reject) => {
fs.createReadStream(
'Reference\\' + postValid.country + ' ADDRESS REF DATA.csv'
)
// .......some code...
.on('end', () => {
postValid.complete = true;
console.log('Done reading');
resolve('success');
});
});
}
};
And Run the RunFunc like this:
async function RunFunc() {
await extract.Start();
console.log("Extraction complete");
await rescore.Start();
console.log("Scoring complete");
}
//or IIFE
RunFunc().then(()=>{
console.log("All Complete");
})
Note: Also you can/should handle error by reject("some error") when some error occurs.
EDIT After knowing about TheFunc():
Making a new Event emitter will probably the easiest solution:
eventEmitter.js
const EventEmitter = require('events').EventEmitter
module.exports = new EventEmitter()
const eventEmitter = require('./eventEmitter');
module.exports = {
Start: function CodeFileRead() {
return new Promise((resolve, reject) => {
//after all of your code
eventEmitter.once('WORK_DONE', ()=>{
resolve("Done");
})
});
}
};
function OpenWriter() {
...
fileWrite.on('finish', () => {
console.log("File write is closed");
eventEmitter.emit("WORK_DONE");
});
}
And Run the RunFunc like as before.
There's no generic way to determine when everything a function call does has finished.
It might accept a callback. It might return a promise. It might not provide any kind of method to determine when it is done. It might have side effects that you could monitor by polling.
You need to read the documentation and/or source code for that particular function.
Use async/await (promises), example:
var extract = require("./postextract.js");
var rescore = require("./standardaddress.js");
RunFunc();
async function extract_start() {
try {
extract.Start()
}
catch(e){
console.log(e)
}
}
async function rescore_start() {
try {
rescore.Start()
}
catch(e){
console.log(e)
}
}
async function RunFunc() {
await extract_start();
console.log("Extraction complete");
await rescore_start();
console.log("Scoring complete");
}
I have found that write() method of stream.Writable class does not write data sequentially. When I an sending am attachment to the server in chunks, this code assembles data chunks in wrong order if no delay occurs. If I put a debug message like console.log() in the middle of the loop (like to dump the data to watch what is being written, actually), this bug disappears. So, what is the race condition in this code ? Looks like I am enforcing a sequential assembling of the file, so I do not understand what is wrong.
My code:
function join_chunks(company_id,attachment_id,num_chunks) {
var stream;
var file;
var output_filename=ATTACHMENTS_PATH + '/comp' + company_id + '/' + attachment_id + '.data';
var input_filename;
var chunk_data;
var chunk_count=0;
stream=fs.createWriteStream(output_filename,{flags:'w+',mode: 0666});
console.log('joining files:');
for(var i=0;i<num_chunks;i++) {
input_filename=ATTACHMENTS_PATH + '/comp' + company_id + '/' + attachment_id + '-' + (i+1) + '.chunk';
console.log(input_filename);
fs.readFile(input_filename , (err, chunk_data) => {
if (err) throw err;
stream.write(chunk_data,function() {
chunk_count++;
if (chunk_count==num_chunks) {
console.log('join finished. closing stream');
stream.end();
}
});
});
}
}
The console:
joining files:
/home/attachments/comp-2084830518/67-1.chunk
/home/attachments/comp-2084830518/67-2.chunk
/home/attachments/comp-2084830518/67-3.chunk
/home/attachments/comp-2084830518/67-4.chunk
join finished. closing stream
Node version: v6.9.2
stream.write is an asynchronous operation. This means that multiple calls to it may be serviced out of order.
If you want your writes to happen in order, use stream.writeSync, or use the callback argument to stream.write to sequence your writes.
My problem is that the code does not seem to be running in order, as seen below.
This code is for my discord.js bot that I am creating.
var Discord = require("discord.js");
var bot = new Discord.Client();
var yt = require("C:/Users/username/Documents/Coding/Discord/youtubetest.js");
var youtubetest = new yt();
var fs = require('fs');
var youtubedl = require('youtube-dl');
var prefix = "!";
var vidid;
var commands = {
play: {
name: "!play ",
fnc: "Gets a Youtube video matching given tags.",
process: function(msg, query) {
youtubetest.respond(query, msg);
var vidid = youtubetest.vidid;
console.log(typeof(vidid) + " + " + vidid);
console.log("3");
}
}
};
bot.on('ready', () => {
console.log('I am ready!');
});
bot.on("message", msg => {
if(!msg.content.startsWith(prefix) || msg.author.bot || (msg.author.id === bot.user.id)) return;
var cmdraw = msg.content.split(" ")[0].substring(1).toLowerCase();
var query = msg.content.split("!")[1];
var cmd = commands[cmdraw];
if (cmd) {
var res = cmd.process(msg, query, bot);
if (res) {
msg.channel.sendMessage(res);
}
} else {
let msgs = [];
msgs.push(msg.content + " is not a valid command.");
msgs.push(" ");
msgs.push("Available commands:");
msgs.push(" ");
msg.channel.sendMessage(msgs);
msg.channel.sendMessage(commands.help.process(msg));
}
});
bot.on('error', e => { console.error(e); });
bot.login("mytoken");
The youtubetest.js file:
var youtube_node = require('youtube-node');
var ConfigFile = require("C:/Users/username/Documents/Coding/Discord/json_config.json");
var mybot = require("C:/Users/username/Documents/Coding/Discord/mybot.js");
function myyt () {
this.youtube = new youtube_node();
this.youtube.setKey(ConfigFile.youtube_api_key);
this.vidid = "";
}
myyt.prototype.respond = function(query, msg) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
msg.channel.sendMessage("There was an error finding requested video.");
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
}
});
console.log("2");
};
module.exports = myyt;
As the code shows, i have an object for the commands that the bot will be able to process, and I have a function to run said commands when a message is received.
Throughout the code you can see that I have put three console.logs with 1, 2 and 3 showing in which order I expect the parts of the code to run. When the code is run and a query is found the output is this:
I am ready!
string +
2
3
1
This shows that the code is running in the wrong order that I expect it to.
All help is very highly appreciated :)
*Update! Thank you all very much to understand why it isn't working. I found a solution where in the main file at vidid = youtubetest.respond(query, msg) when it does that the variable is not assigned until the function is done so it goes onto the rest of my code without the variable. To fix I simply put an if statement checking if the variable if undefined and waiting until it is defined.*
Like is mentioned before, a lot of stuff in javascript runs in async, hence the callback handlers. The reason it runs in async, is to avoid the rest of your code being "blocked" by remote calls. To avoid ending up in callback hell, most of us Javascript developers are moving more and more over to Promises. So your code could then look more like this:
myyt.prototype.respond = function(query, msg) {
return new Promise(function(resolve, reject) {
this.youtube.search(query, 1, function(error, result) {
if (error) {
reject("There was an error finding requested video."); // passed down to the ".catch" statement below
} else {
vidid = 'http://www.youtube.com/watch?v=' + result.items[0].id.videoId;
myyt.vidid = vidid;
console.log("1");
resolve(2); // Resolve marks the promises as successfully completed, and passes along to the ".then" method
}
});
}).then(function(two) {
// video is now the same as myyt.vidid as above.
console.log(two);
}).catch(function(err) {
// err contains the error object from above
msg.channel.sendMessage(err);
})
};
This would naturally require a change in anything that uses this process, but creating your own prototypes seems.. odd.
This promise returns the vidid, so you'd then set vidid = youtubetest.response(query, msg);, and whenever that function gets called, you do:
vidid.then(function(id) {
// id is now the vidid.
});
Javascript runs async by design, and trying to hack your way around that leads you to dark places fast. As far as I can tell, you're also targetting nodeJS, which means that once you start running something synchronously, you'll kill off performance for other users, as everyone has to wait for that sync call to finish.
Some suggested reading:
http://callbackhell.com/
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise
https://stackoverflow.com/a/11233849/3646975
I'd also suggest looking up ES6 syntax, as it shortens your code and makes life a hellofalot easier (native promises were only introduced in ES6, which NodeJS 4 and above supports (more or less))
In javascript, please remember that any callback function you pass to some other function is called asynchronously. I.e. the calls to callback function may not happen "in order". "In order" in this case means the order they appear on the source file.
The callback function is simply called on certain event:
When there is data to be processed
on error
in your case for example when the youtube search results are ready,
'ready' event is received or 'message' is received.
etc.
I have a little FTP script which basically transfer an entire directory tree (by walking it with fs.readdir) to an FTP server one file at a time (I have to do some analysis on each file as it's uploaded hence the one-at-a-time behaviour).
However, the bit that does a single file (there's another bit for directories which uses c.mkdir rather than c.put) looks like this:
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
console.log(err);
});
c.end();
});
As you can see, it's using a very simple method of logging in that failures simply get sent to the console.
Unfortunately, since the FTPs are done asynchronously, errors are being delivered to the console in a sequence totally unrelated to the file name output.
Is there a way to force the FTP to be done synchronously so that errors would immediately follow the file name? Basically, I want the entire sequence from the initial console.log to the final }); to be done before moving on to the next file.
Even if there is, it's not recommended. You generally don't want to block the event loop with such a long synchronous operation.
What would probably be more useful is using recursion or Promises to ensure that things happen in a sequence.
Example:
let ivals = [/* lots of ivals here */];
function putItems(ivals) {
let ival = ivals[0];
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
console.log(err);
c.end();
// Don't continue if we're out of items.
if (ivals.length === 1) { return; }
putItems(ivals.slice(1)); // Call again with the rest of the items.
});
});
}
putItems(ivals);
It can probably be done more intelligently by using a nested function and a single FTP context. But you get the point.
Without making things synchronous, you can solve your error logging problem by just logging the name with the error. You can just wrap this in a closure so you can keep track of ival that goes with a particular error:
(function(ival) {
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
console.log('[' + ival + ']', err);
});
c.end();
});
})(ival);
Why dont you just push the errors to an array, and when all uploads are done, you will have that array
with all those errors in order ?
I will do something like this:
var errArray = [];
console.log('Transferring [' + ival + ']');
var c = new Ftp();
c.on('ready', function() {
c.put(ival, ival, function(err) {
errArray.push( err );
});
c.end();
});
c.on('end', function() {
errArray.forEach( function( err ){
console.log( err );
})
});
I'm working on a grunt plugin that was written (by someone else) to receive hard-coded file names (src and dest), but I'm trying to change it to be able to be pointed to a directory with a globbing pattern and specify an output folder for the "dest". But I'm having trouble with the async.each, because my initial implementation has a nested async.each. Specifically, I think I have a problem with when to call the callback(). I'm getting hung up in some loop somewhere.
This does work as written because the files are created correctly both ways of configuring the Gruntfile.js, but the previously-working tests are now broken.
I'm just wondering about how to structure the second nested loop. Perhaps that doesn't need to use async?
The Gruntfile.js should be able to be config'd as:
myplugin: {
dev: {
files : {
'src/business.html': 'src/test_src/business.test',
...
}
}
},
or as a globbing pattern (this is what I'm adding)
myplugin: {
dev: {
src: ['src/test_src/*.test'],
dest: 'output'
}
},
The plugin started out with a single async.each, with each loop handling a specific "files" src/dest. But when we're using a globbing pattern, there's only one outer loop, the pattern, so I need a second async.each to handle the actual files (there are ~11).
grunt.registerMultiTask('myplugin', 'Compiles files using myplugin', function () {
done = this.async();
// Iterate over all specified file groups.
async.each(this.files, function (fileGlob, cb) {
var destination = fileGlob.dest;
grunt.log.debug("FileGlob: " + fileGlob);
async.each(fileGlob.src, function (filepath, callback) {
if (notAPartial(filepath) && grunt.file.exists(filepath)) {
if (filepath.match(/\.(test|html)$/)) {
grunt.log.debug('test compilation of ' + filepath);
compileMyFile(filepath, destination);
} else {
// this callback is from the orig version
// i think it's causing problems with the nested async.each calls
callback(new Error("No handler for filetype: " + filepath));
}
}
}, function(err) {
if(err) done(err);
else done();
});
cb();
}, function(err) {
if(err) done(err);
else done();
grunt.log.ok("Compiled " + count + " files.");
});
})
It looks like your callbacks are a little out of place. The signature for async.each is: async.each(arrayOfThings, callbackPerThing, callbackWhenWeGotThroughAllThings).
For nesting async.each statements, I like to name the callbacks based on what they do to avoid confusion when nesting, such as:
var done = this.async();
async.each(this.files, function(fileGlob, nextGlob) {
async.each(fileGlob.src, function(filepath, nextFile) {
doSomethingAsync(function() {
// Continue to the next file
nextFile();
});
}, function() {
// When we are done with all files in this glob
// continue to the next glob
nextGlob();
});
}, function() {
// When we are done with all globs
// call done to tell the Grunt task we are done
done();
});
In your case above, you are right about not needing the inner async.each. Nor do you need the outer async.each as none of the operations appear to be asynchronous. You can more simply do the following:
grunt.registerMultiTask('myplugin', 'Compiles files using myplugin', function () {
this.files.forEach(function(fileGlob) {
var destination = fileGlob.dest;
grunt.log.debug("FileGlob: " + fileGlob);
fileGlob.src.forEach(function(filepath) {
if (notAPartial(filepath) && grunt.file.exists(filepath)) {
if (filepath.match(/\.(test|html)$/)) {
grunt.log.debug('test compilation of ' + filepath);
compileMyFile(filepath, destination);
} else {
grunt.log.error("No handler for filetype: " + filepath);
}
}
});
});
grunt.log.ok("Compiled " + count + " files.");
});