Busboy and GridFS won't save file to filesystem - javascript

When the following POST API is called, it should save the file to the file system. However, the file is not being saved. I can see the file in the console, but can't save/write it.
I have the following code:
router.post('/notes', function(req, res, next) {
var gfsstream, startFileWrite, endFileWriteTime;
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
startFileWrite = new Date().getTime();
console.log('File [' + fieldname + ']: filename: ' + filename);
gfsstream = gfs.createWriteStream('/uploads');
file.on('data', function(data) {
gfsstream.write(data);
});
file.on('end', function() {
gfsstream.end();
req.pipe(gfsstream);
});
gfsstream.on('close', function (file) {
// do something with `file`
endFileWrite = new Date().getTime();
console.log('File [' + fieldname + '] Finished');
console.log("Time needed: " + (endFileWrite - startFileWrite) + " ms");
});
});
busboy.on('error', function(err) {
console.error(err);
res.sendStatus(500, 'ERROR', err);
});
busboy.on('finish', function end() {
res.sendStatus(200);
});
req.pipe(busboy);
});
req.pipe(gfsstream) might be the issue here, but I am not sure what is preventing the file from being saved.

Just do file.pipe(gfsstream) and use the finish event instead of the close event:
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
startFileWrite = new Date().getTime();
console.log('File [' + fieldname + ']: filename: ' + filename);
gfsstream = gfs.createWriteStream('/uploads');
file.pipe(gfsstream).on('finish', function() {
endFileWrite = new Date().getTime();
console.log('File [' + fieldname + '] Finished');
console.log("Time needed: " + (endFileWrite - startFileWrite) + " ms");
});
});

Related

Chaining in EventEmitters in JavaScript and calling emitter.emit() before emitter.on()

In an example the event.on() was called after event.emit() method but still the results appeared. I know that some wehre it is related to chaining but can anyone explain the chaining and the cause why the order of operations isn't a concern in the example
var EventEmitter = require('events').EventEmitter;
var fs = require('fs');
function findPattern(files, regex) {
var emitter = new EventEmitter();
files.forEach(function(file) {
fs.readFile(file, 'utf8', function(err, content) {
if(err)
return emitter.emit('error', err);
emitter.emit('fileread', file);
var match = null;
if(match = content.match(regex))
match.forEach(function(elem) {
emitter.emit('found', file, elem);
}); });
});
return emitter;
}
findPattern(
['fileA.txt', 'fileB.json'],
/hello \w+/g
)
.on('fileread', function(file) {
console.log(file + ' was read');
})
.on('found', function(file, match) {
console.log('Matched "' + match + '" in file ' + file);
})
.on('error', function(err) {
console.log('Error emitted: ' + err.message);
});

Node.js Downloading multiples files asynchronously

In trying to get a hang of node.js asynchronous coding style, I decided to write a program that would read a text file containing a bunch of URLS to download and download each file. I started out writing a function to download just one file (which works fine), but having trouble extending the logic to download multiple files.
Here's the code:
var http = require("http"),
fs = require("fs"),
input = process.argv[2],
folder = "C:/Users/Wiz/Downloads/",
regex = /(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?/,
urls = null,
url = "",
filename = "";
fs.readFile(input, "utf8", function(e, data) {
console.log("Reading file: " + input);
if (e) console.log("Got error:" + e.message);
urls = data.split("\n");
for (var i = urls.length; i--;) {
url = urls[i];
if (!url.match(regex)) continue;
filename = folder + url.substring(url.lastIndexOf('/') + 1);
downloadQueue.addItem(url, filename);
}
});
var downloadQueue = {
queue: [],
addItem: function(p_sSrc, p_sDest) {
this.queue.push({
src: p_sSrc,
dest: p_sDest
});
if (this.queue.length === 1) {
this.getNext();
}
},
getNext: function() {
var l_oItem = this.queue[0];
http.get(l_oItem.src, function(response) {
console.log("Downloading: " + l_oItem.dest);
var file = fs.createWriteStream(l_oItem.dest);
response.on("end", function() {
file.end();
console.log("Download complete.");
downloadQueue.removeItem();
}).on("error", function(error) {
console.log("Error: " + error.message);
fs.unlink(l_oItem.dest);
});
response.pipe(file);
});
},
removeItem: function() {
this.queue.splice(0, 1);
if (this.queue.length != 0) {
this.getNext();
} else {
console.log("All items downloaded");
}
}
};
How do I structure the code so that the completion of the first download can signal the initiation of the next one. Please note that this exercise is just for learning purposes, to understand how asynchronous coding works. In practice, I'm sure there are much better tools out there to download multiple files.
Try simple at first, it look like you copy paste codes and quite don't understand what they do.
Do a simple loop, that get the url, and print something.
var http = require('http');
URL = require('url').parse('http://www.timeapi.org/utc/now?format=%25F%20%25T%20-%20%25N')
URL['headers'] = {'User-Agent': 'Hello World'}
// launch 20 queries asynchronously
for(var i = 0; i < 20; i++) {
(function(i) {
console.log('Query ' + i + ' started');
var req = http.request(URL, function(res) {
console.log('Query ' + i + ' status: ' + res.statusCode + ' - ' + res.statusMessage);
res.on('data', function(content){
console.log('Query ' + i + ' ended - ' + content);
});
});
req.on('error', function(err) {
console.log('Query ' + i + ' return error: ' + err.message);
});
req.end();
})(i);
}
All the urls will be fetched asynchronously. You can observe that the response does not arrive in order, but are still processed correctly.
The difficulty with async is not to do the things is parallel, because you just write like a single task, and execute multiple time. It becomes complicated when you need for instance to wait for all tasks to finished before continuing. And for that, have a look at promises
Here is what I started out with. Figuring that each download was invoked asynchronously, they would all be independent of each other.
var http = require("http"),
fs = require("fs"),
input = process.argv[2],
folder = "C:/Users/Wiz/Downloads/",
regex = /(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?/,
urls = null,
url = "",
filename = "";
fs.readFile(input, "utf8",
function(e, data) {
console.log("Reading file: " + input);
if (e) console.log("Got error:" + e.message);
urls = data.split("\n");
for (var i = urls.length; i--;) {
url = urls[i];
if (!url.match(regex)) continue;
filename = folder + url.substring(url.lastIndexOf('/') + 1);
http.get(url, function(response) {
var file = fs.createWriteStream(filename);
response.on("end", function() {
file.end();
});
response.pipe(file);
})
}
});

Streaming to and from external programs expecting files with javascript nodejs

Problem:
I need to upload hundreds of PDF documents, convert them to HTML and then store the HTML in MongoDB. I am currently saving both the incoming PDF documents and converted HTML in the file system. Is there a way to use streams to avoid all the file I/O?
Current approach (which works but is slow):
I am using:
Busboy to read the uploaded PDF documents which I save to the file system.
I create an "exec" child process in node.js which invokes "'pdftohtml -c -s -noframes -nodrm ' + inputFileNamePDF + ' ' + outputFileNameHTML,". The HTML output files get saved to the file system.
I then iterate through all the HTML files to create a Bulk upsert to MongoDB.
Ideally I'd like to stream the uploaded PDF file directly to "inputFileNamePDF". Then stream the converted "outputFileNameHTML" to the bulk upsert.
Here's the Code:
var path = require("path"),
Busboy = require('busboy')
http = require('http'),
util = require('util'),
fs = require('fs-extra'),
pdftohtml = require('pdftohtmljs'),
exec =require('child_process').exec,
pdf_extract = require('pdf-extract'),
exports.postUpload = function (req, res) {
// parse a file upload
var fileName = "";
var uploadDir = '/tmp/' + res.locals.user._doc.email.replace(/[#\.]/g,"_");
var infiles = 0, outfiles = 0, done = false,
busboy = new Busboy({ headers: req.headers });
console.log('Start parsing form ...');
busboy.on('file', function (fieldname, file, filename) {
++infiles;
console.log("file event #" + infiles);
onFile(fieldname, file, filename, function () {
++outfiles;
console.log("file #" + infiles + " written.");
if (done) console.log(outfiles + '/' + infiles + ' parts written to disk');
if (done && infiles === outfiles) {
// ACTUAL EXIT CONDITION
console.log('All parts written to disk');
res.writeHead(200, { 'Connection': 'close' });
res.end("That's all folks!");
convertToHTMLTxt();
}
});
});
busboy.on('finish', function () {
console.log('Done parsing form!');
done = true;
});
req.pipe(busboy);
function onFile(fieldname, file, filename, next) {
// or save at some other location
var fileName = "";
fileName = filename.replace( /[^a-z0-9_\-]/gi,"_");
fileName = fileName.replace(/_(pdf|docx|doc)$/i,".$1");
var fstream = fs.createWriteStream(path.join(uploadDir, fileName));
file.on('end', function () {
console.log(fieldname + '(' + fileName + ') EOF');
});
fstream.on('close', function () {
console.log(fieldname + '(' + fileName + ') written to disk');
next();
});
console.log(fieldname + '(' + fileName + ') start saving');
file.pipe(fstream);
}
function convertToHTMLTxt () {
var execTxt, execHTML, execPDF;
var textDir = 'text';
var htmlDir = 'html';
console.log('Directory: ', uploadDir);
fs.readdir(uploadDir, function(err, files) {
if (err) {
console.log('error reading directory: ', uploadDir);
return;
}
files.forEach(function(fileName) {
var fileNameHTML = path.join(uploadDir, htmlDir,
fileName.replace(/(pdf|docx|doc)$/i,"html"));
var fileNamePDF = path.join(uploadDir, fileName);
if (fileName.match(/pdf$/i)) {
execPDF = exec('pdftohtml -c -s -noframes -nodrm '
+ fileNamePDF + ' ' + fileNameHTML,
function(error, stdout, stderr) {
console.log('stdout: ', stdout);
console.log('stderr: ', stderr);
if (error !== null) {
console.log('exec error: ', error);
}
});
execPDF.on('close', function (code) {
console.log('******** PDF to HTML Conversion complete - exit code '
+ code);
});
}
})
});
Once the conversion is done I iterate through all the HTML files and do a MongoDB bulk upsert:
fs.readFile(fileNameHTML, 'utf8', function (err, HTMLData) {
if (err) {
console.log('error reading file: ', fileNameHTML + '/nerror: ' + err);
callback(err);
return;
}
bulk.find({ userName: userName,
docName : fileName
}).upsert()
.updateOne({userName: userName,
docName : fileName,
HTMLData : HTMLData});

I'm trying to reply to a tweet using Twit on Node.js

Not sure how to input the in_reply_to_status_id.
It's tweeting out fine, just not replying to the tweet with the mention in it.
The in_reply_to_status_id is part of the Twitter API, which Twit accesses, but can I use this in this context?
Any help would be greatly appreciated.
Here's the code:
var stream = T.stream('statuses/filter', { track: '#example'});
io.on('connection', function (socket) {
socket.on('chat message', function (msg) {
console.log('message: ' + msg);
P.post('statuses/update', { status: '#example' + ' ' + msg}, function (err, data, response) {
socket.emit('info', data.text);
socket.emit('userPic', data.user.profile_image_url);
console.log(data.user.profile_image_url);
});
});
stream.start();
stream.on('tweet', function (tweet) {
console.log(tweet);
// console.log('listening to tweets');
if (tweet.text.indexOf('#example') > -1) {
console.log("there is a tweet");
var number = Date.now();
var reply = replies[Math.floor(Math.random() * replies.length)];
var name = '#' + tweet.user.screen_name;
T.post('statuses/update', {in_reply_to_status_id: [name], status: reply + ' ' + number + ' ' + name}, function (err, data, response) {
console.log(reply + number);
socket.emit('reply', data.text);
});
}
});
});
The user name ID string was not being parsed correctly. The solution:
var nameID = tweet.id_str;
var name = tweet.user.screen_name;
T.post('statuses/update', {in_reply_to_status_id: nameID, status: reply + ' ' + number + ' #' + name}, function(err, data, response) {

checking if busboy finish event has already occured or not

I have a form in which I am expected to do some file processing which takes some time, so I want that finish event executes only after the processing is complete, right now
node is processing the file and while it is processing the file and executes commands node if finds finish event it fires it. so, how do i make sure that the finish event is fired only after processing of all files.
busboy.on('file', function(fieldname, file, filename,transferEncoding,mimeType) {
var fName = uuid.v4();
var fileext = filename.substr(filename.lastIndexOf('.') + 1);
var filepath = path.normalize(__dirname + '/../../');
var fstream = fs.createWriteStream(filepath+'/server/uploads/'+fName+'.'+fileext);
var uploadFileCompletion = file.pipe(fstream);
uploadFileCompletion.on('finish',function(){
console.log('uploaded now');
var cmd = 'libreoffice --headless --convert-to pdf --outdir '+ filepath + 'server/uploads ' + filepath + 'server/uploads/' + fName + '.' + fileext;
exec(cmd, function(error,stdout,stderr){
sys.puts(stdout);
var encryptCmd = 'java -jar server/uploads/pdfbox-app-1.8.6.jar Encrypt -canAssemble false -canExtractContent false -canExtractForAccessibility false ' +
'-canModify false -canModifyAnnotations false -canPrint false -canPrintDegraded false server/uploads/' + fName + '.' + 'pdf'
+ ' ' + 'server/uploads/' +fName + '.' + 'pdf';
exec(encryptCmd, function(error,stdout,stderr){
fs.unlink(filepath+'server/uploads/'+fName + '.' + fileext, function(){
console.log("removed " +filepath+'server/uploads/'+fName + '.' + fileext);
actualFileName.push(filename);
storedFileName.push(fName+'.'+'pdf');
});
});
});
});
});
busboy.on('field', function(fieldname, val, valTruncated,keyTruncated) {
noteData = JSON.parse(val);
});
busboy.on('finish',function(){
noteData.uploader = req.user.username;
noteData.actualFileName = actualFileName;
noteData.storedFileName = storedFileName;
noteData.noteId = uuid.v4();
Campusnotes.create(noteData,function(err,note){
if(err){
res.status(400);
return res.send({reason:err.toString()});
}
console.log('finish');
res.status(200);
res.end();
})
});
now the console log for this is as follows -
finish
uploaded now
convert /home/unknown/public_html/campustop/server/uploads/8465f9a9-d6b7-4d53-8cb5-a8dbf3aed6a5.odt -> /home/unknown/public_html/campustop/server/uploads/8465f9a9-d6b7-4d53-8cb5-a8dbf3aed6a5.pdf using writer_pdf_Export
removed /home/unknown/public_html/campustop/server/uploads/8465f9a9-d6b7-4d53-8cb5-a8dbf3aed6a5.odt
indicating that the finish event is getting fired again and again
You could try something like:
var files = 0;
busboy.on('file', function(fieldname, file, filename,transferEncoding,mimeType) {
++files;
var fName = uuid.v4();
var fileext = filename.substr(filename.lastIndexOf('.') + 1);
var filepath = path.normalize(__dirname + '/../../');
var fstream = fs.createWriteStream(filepath+'/server/uploads/'+fName+'.'+fileext);
file.pipe(fstream).on('finish',function() {
console.log('uploaded now');
var cmd = 'libreoffice --headless --convert-to pdf --outdir '+ filepath + 'server/uploads ' + filepath + 'server/uploads/' + fName + '.' + fileext;
exec(cmd, function(error,stdout,stderr) {
console.log(stdout);
var encryptCmd = 'java -jar server/uploads/pdfbox-app-1.8.6.jar Encrypt -canAssemble false -canExtractContent false -canExtractForAccessibility false ' +
'-canModify false -canModifyAnnotations false -canPrint false -canPrintDegraded false server/uploads/' + fName + '.' + 'pdf'
+ ' ' + 'server/uploads/' +fName + '.' + 'pdf';
exec(encryptCmd, function(error,stdout,stderr) {
fs.unlink(filepath+'server/uploads/'+fName + '.' + fileext, function() {
console.log("removed " +filepath+'server/uploads/'+fName + '.' + fileext);
actualFileName.push(filename);
storedFileName.push(fName+'.'+'pdf');
});
});
--files;
onFinish();
});
});
});
busboy.on('field', function(fieldname, val, valTruncated,keyTruncated) {
noteData = JSON.parse(val);
});
busboy.on('finish', onFinish);
function onFinish() {
if (!busboy.writable && files === 0) {
noteData.uploader = req.user.username;
noteData.actualFileName = actualFileName;
noteData.storedFileName = storedFileName;
noteData.noteId = uuid.v4();
Campusnotes.create(noteData,function(err,note){
if (err){
res.status(400);
return res.send({reason:err.toString()});
}
console.log('finish');
res.status(200);
res.end();
});
}
}
On an unrelated note, you should probably do some sanitizing/checking of the filename, someone could be malicious and use something like '../../../../../../../../../etc/passwd' (I'm not sure if createWriteStream() resolves/normalizes the path given to it or not).

Categories

Resources