Stream file uploaded with Express.js through gm to eliminate double write - javascript

I'm using Express.js and have a route to upload images that I then need to resize. Currently I just let Express write the file to disk (which I think uses node-formidable under the covers) and then resize using gm (http://aheckmann.github.com/gm/) which writes a second version to disk.
gm(path)
.resize(540,404)
.write(dest, function (err) { ... });
I've read that you can get a hold of the node-formidable file stream before it writes it to disk, and since gm can accept a stream instead of just a path, I should be able to pass this right through eliminating the double write to disk.
I think I need to override form.onPart but I'm not sure where (should it be done as Express middleware?) and I'm not sure how to get a hold of form or what exactly to do with the part. This is the code skeleton that I've seen in a few places:
form.onPart = function(part) {
if (!part.filename) { form.handlePart(part); return; }
part.on('data', function(buffer) {
});
part.on('end', function() {
}
}
Can somebody help me put these two pieces together? Thanks!

You're on the right track by rewriting form.onPart. Formidable writes to disk by default, so you want to act before it does.
Parts themselves are Streams, so you can pipe them to whatever you want, including gm. I haven't tested it, but this makes sense based on the documentation:
var form = new formidable.IncomingForm;
form.onPart = function (part) {
if (!part.filename) return this.handlePart(part);
gm(part).resize(200, 200).stream(function (err, stdout, stderr) {
stdout.pipe(fs.createWriteStream('my/new/path/to/img.png'));
});
};
As for the middleware, I'd copypaste the multipart middleware from Connect/Express and add the onPart function to it: http://www.senchalabs.org/connect/multipart.html
It'd be a lot nicer if formidable didn't write to disk by default or if it took a flag, wouldn't it? You could send them an issue.

Related

Unzip a zip file with JavaScript [duplicate]

I want to display OpenOffice files, .odt and .odp at client side using a web browser.
These files are zipped files. Using Ajax, I can get these files from server but these are zipped files. I have to unzip them using JavaScript, I have tried using inflate.js, http://www.onicos.com/staff/iz/amuse/javascript/expert/inflate.txt, but without success.
How can I do this?
I wrote an unzipper in Javascript. It works.
It relies on Andy G.P. Na's binary file reader and some RFC1951 inflate logic from notmasteryet. I added the ZipFile class.
working example:
http://cheeso.members.winisp.net/Unzip-Example.htm (dead link)
The source:
http://cheeso.members.winisp.net/srcview.aspx?dir=js-unzip (dead link)
NB: the links are dead; I'll find a new host soon.
Included in the source is a ZipFile.htm demonstration page, and 3 distinct scripts, one for the zipfile class, one for the inflate class, and one for a binary file reader class. The demo also depends on jQuery and jQuery UI. If you just download the js-zip.zip file, all of the necessary source is there.
Here's what the application code looks like in Javascript:
// In my demo, this gets attached to a click event.
// it instantiates a ZipFile, and provides a callback that is
// invoked when the zip is read. This can take a few seconds on a
// large zip file, so it's asynchronous.
var readFile = function(){
$("#status").html("<br/>");
var url= $("#urlToLoad").val();
var doneReading = function(zip){
extractEntries(zip);
};
var zipFile = new ZipFile(url, doneReading);
};
// this function extracts the entries from an instantiated zip
function extractEntries(zip){
$('#report').accordion('destroy');
// clear
$("#report").html('');
var extractCb = function(id) {
// this callback is invoked with the entry name, and entry text
// in my demo, the text is just injected into an accordion panel.
return (function(entryName, entryText){
var content = entryText.replace(new RegExp( "\\n", "g" ), "<br/>");
$("#"+id).html(content);
$("#status").append("extract cb, entry(" + entryName + ") id(" + id + ")<br/>");
$('#report').accordion('destroy');
$('#report').accordion({collapsible:true, active:false});
});
}
// for each entry in the zip, extract it.
for (var i=0; i<zip.entries.length; i++) {
var entry = zip.entries[i];
var entryInfo = "<h4><a>" + entry.name + "</a></h4>\n<div>";
// contrive an id for the entry, make it unique
var randomId = "id-"+ Math.floor((Math.random() * 1000000000));
entryInfo += "<span class='inputDiv'><h4>Content:</h4><span id='" + randomId +
"'></span></span></div>\n";
// insert the info for one entry as the last child within the report div
$("#report").append(entryInfo);
// extract asynchronously
entry.extract(extractCb(randomId));
}
}
The demo works in a couple of steps: The readFile fn is triggered by a click, and instantiates a ZipFile object, which reads the zip file. There's an asynchronous callback for when the read completes (usually happens in less than a second for reasonably sized zips) - in this demo the callback is held in the doneReading local variable, which simply calls extractEntries, which
just blindly unzips all the content of the provided zip file. In a real app you would probably choose some of the entries to extract (allow the user to select, or choose one or more entries programmatically, etc).
The extractEntries fn iterates over all entries, and calls extract() on each one, passing a callback. Decompression of an entry takes time, maybe 1s or more for each entry in the zipfile, which means asynchrony is appropriate. The extract callback simply adds the extracted content to an jQuery accordion on the page. If the content is binary, then it gets formatted as such (not shown).
It works, but I think that the utility is somewhat limited.
For one thing: It's very slow. Takes ~4 seconds to unzip the 140k AppNote.txt file from PKWare. The same uncompress can be done in less than .5s in a .NET program. EDIT: The Javascript ZipFile unpacks considerably faster than this now, in IE9 and in Chrome. It is still slower than a compiled program, but it is plenty fast for normal browser usage.
For another: it does not do streaming. It basically slurps in the entire contents of the zipfile into memory. In a "real" programming environment you could read in only the metadata of a zip file (say, 64 bytes per entry) and then read and decompress the other data as desired. There's no way to do IO like that in javascript, as far as I know, therefore the only option is to read the entire zip into memory and do random access in it. This means it will place unreasonable demands on system memory for large zip files. Not so much a problem for a smaller zip file.
Also: It doesn't handle the "general case" zip file - there are lots of zip options that I didn't bother to implement in the unzipper - like ZIP encryption, WinZip encryption, zip64, UTF-8 encoded filenames, and so on. (EDIT - it handles UTF-8 encoded filenames now). The ZipFile class handles the basics, though. Some of these things would not be hard to implement. I have an AES encryption class in Javascript; that could be integrated to support encryption. Supporting Zip64 would probably useless for most users of Javascript, as it is intended to support >4gb zipfiles - don't need to extract those in a browser.
I also did not test the case for unzipping binary content. Right now it unzips text. If you have a zipped binary file, you'd need to edit the ZipFile class to handle it properly. I didn't figure out how to do that cleanly. It does binary files now, too.
EDIT - I updated the JS unzip library and demo. It now does binary files, in addition to text. I've made it more resilient and more general - you can now specify the encoding to use when reading text files. Also the demo is expanded - it shows unzipping an XLSX file in the browser, among other things.
So, while I think it is of limited utility and interest, it works. I guess it would work in Node.js.
I'm using zip.js and it seems to be quite useful. It's worth a look!
Check the Unzip demo, for example.
I found jszip quite useful. I've used so far only for reading, but they have create/edit capabilities as well.
Code wise it looks something like this
var new_zip = new JSZip();
new_zip.load(file);
new_zip.files["doc.xml"].asText() // this give you the text in the file
One thing I noticed is that it seems the file has to be in binary stream format (read using the .readAsArrayBuffer of FileReader(), otherwise I was getting errors saying I might have a corrupt zip file
Edit: Note from the 2.x to 3.0.0 upgrade guide:
The load() method and the constructor with data (new JSZip(data)) have
been replaced by loadAsync().
Thanks user2677034
If you need to support other formats as well or just need good performance, you can use this WebAssembly library
it's promised based, it uses WebWorkers for threading and API is actually simple ES module
How to use
Install with npm i libarchive.js and use it as a ES module.
The library consists of two parts: ES module and webworker bundle, ES module part is your interface to talk to library, use it like any other module. The webworker bundle lives in the libarchive.js/dist folder so you need to make sure that it is available in your public folder since it will not get bundled if you're using bundler (it's all bundled up already) and specify correct path to Archive.init() method.
import {Archive} from 'libarchive.js/main.js';
Archive.init({
workerUrl: 'libarchive.js/dist/worker-bundle.js'
});
document.getElementById('file').addEventListener('change', async (e) => {
const file = e.currentTarget.files[0];
const archive = await Archive.open(file);
let obj = await archive.extractFiles();
console.log(obj);
});
// outputs
{
".gitignore": {File},
"addon": {
"addon.py": {File},
"addon.xml": {File}
},
"README.md": {File}
}
I wrote "Binary Tools for JavaScript", an open source project that includes the ability to unzip, unrar and untar: https://github.com/codedread/bitjs
Used in my comic book reader: https://github.com/codedread/kthoom (also open source).
HTH!
If anyone's reading images or other binary files from a zip file hosted at a remote server, you can use following snippet to download and create zip object using the jszip library.
// this function just get the public url of zip file.
let url = await getStorageUrl(path)
console.log('public url is', url)
//get the zip file to client
axios.get(url, { responseType: 'arraybuffer' }).then((res) => {
console.log('zip download status ', res.status)
//load contents into jszip and create an object
jszip.loadAsync(new Blob([res.data], { type: 'application/zip' })).then((zip) => {
const zipObj = zip
$.each(zip.files, function (index, zipEntry) {
console.log('filename', zipEntry.name)
})
})
Now using the zipObj you can access the files and create a src url for it.
var fname = 'myImage.jpg'
zipObj.file(fname).async('blob').then((blob) => {
var blobUrl = URL.createObjectURL(blob)

How to read a file being saved to Parse server with Cloud Code, before actually saving it?

I'm trying to use Cloud Code to check whether a user-submitted image is in a supported file type and not too big.
I know I need to do this verification server-side and I think I should do it with Cloud Code using beforeSave – the doc even has a specific example about data validation, but it doesn't explain how to handle files and I couldn't figure it out.
I've tried the documented method for saving files, ie.
file = fileUploadControl.files[0];
var parseFile = new Parse.File(name, file);
currentUser.set("picture", parseFile);
currentUser.save();
and in the Cloud Code,
Parse.Cloud.beforeSave(Parse.User, (request, response) => { // code here });
But 1. this still actually saves the file on my server, right? I want to check the file size first to avoid saving too many big files...
And 2. Even then, I don't know what to do in the beforeSave callback. It seems I can only access the URL of the saved image (proof that it has been uploaded), and it seems very counter-intuitive to have to do another https request to check the file size and type before deciding whether to proceed with attaching the file to the User object.
(I'm currently using remote-file-size and file-type to check the size and type of the uploaded file, but no success here either).
I also tried calling a Cloud function, but it feels like I'm not doing the right thing, and besides I'm running into the same issues.
I can call a Cloud function and pass a saved ParseFile as a parameter, and then I know how to save it to the User object from the Cloud Code using the masterkey, but as above it still involves uploading the file to the server and then re-fetching it using its URL.
Am I missing anything here?
Is there no way to do something like a beforeSave on Parse.File, and then stop the file from being saved if it doesn't meet certain criteria?
Cheers.
If you have to do something with files, parse lets you overwrite the file adapter to handle file operations.
You can indicate the file adapter to use in your ParseServer instatiation:
var FSStoreAdapter = require('./file_adapter');
var api = new ParseServer({
databaseURI: databaseUri ,
cloud: process.env.CLOUD_CODE_MAIN || __dirname + '/cloud/main.js',
appId: process.env.APP_ID,
filesAdapter: fs_store_adapter, // YOUR FILE ADAPTER
masterKey: process.env.MASTER_KEY, //Add your master key here. Keep it secret!
serverURL: "https://yourUrl", // Don't forget to change to https if needed
publicServerURL: "https://yourUrl",
liveQuery: {
classNames: ["Posts", "Comments"] // List of classes to support for query subscriptions
}
maxUploadSize: "500mb" //you will now have 500mb limit :)
});
That said, you can also specify a maxUploadSize in your instatiation as you can see in the last line.
you have to use save in background
file = ParseFile("filename", file)
file?.saveInBackground({ e ->
if (e == null) {
} else {
Toast.makeText(applicationContext, "Error: $e", Toast.LENGTH_SHORT).show()
e.printStackTrace()
Log.d("DEBUG", "file " + e.code)
}
}, { percentDone ->
Log.d("DEBUG", "file:" + percentDone!!)
})

How can I extract (read + delete) from a textfile in NodeJS?

I'm building a script that reads log files, handles what needs to be handled then writes them to a database
Some caveats :
Some log files have a lot of input, multiple times a second
Some log files have few to no input at all
What I try in simple words:
Reading the first line of a file, then deleting this line to go to the next one, while I handle the first line, other lines could be added..
Issues I'm facing
When I try reading a file then processing it, then deleting the
files, some lines have been added
When the app crashes while
handling multiple lines at once for any reason, I can't know what
lines have been processed.
Tried so far
fs.readdir('logs/', (err, filenames) => {
filenames.forEach((filename) => {
fs.readFile('logs/'+filename, 'utf-8', (err, content) => {
//processing all new lines (can take multiple ms)
//deleting file
fs.unlink('logs/'+filename)
});
});
});
Is there not a (native or not) method to 'take' first line(s), or take all lines, from a file at once?
Something similar to what the Array.shift() method does to arrays..
Why you are reading the file at once. Instead you can use the node.js streams.
https://nodejs.org/api/fs.html#fs_class_fs_readstream
This will read the files and output to console
var fs = require('fs');
var readStream = fs.createReadStream('myfile.txt');
readStream.pipe(process.stdout);
You can also go for the npm package node-tail to read the content of a files while new content written to it.
https://github.com/lucagrulla/node-tail
If your log files has been writen as rotate logs. Example: Each hours has each log file, 9AM.log, 10AM.log....When you process the log files, you can skip current file and process another files. ex: now is 10:30 AM o’clock, skip file 10AM.log, solve another files.

NodeJS exec with binary from and to the process

I'm trying to write a function, that would use native openssl to do some RSA heavy-lifting for me, rather than using a js RSA library. The target is to
Read binary data from a file
Do some processing in the node process, using JS, resulting in a Buffer containing binary data
Write the buffer to the stdin stream of the exec command
RSA encrypt/decrypt the data and write it to the stdout stream
Get the input data back to a Buffer in the JS-process for further processing
The child process module in Node has an exec command, but I fail to see how I can pipe the input to the process and pipe it back to my process. Basically I'd like to execute the following type of command, but without having to rely on writing things to files (didn't check the exact syntax of openssl)
cat the_binary_file.data | openssl -encrypt -inkey key_file.pem -certin > the_output_stream
I could do this by writing a temp file, but I'd like to avoid it, if possible. Spawning a child process allows me access to stdin/out but haven't found this functionality for exec.
Is there a clean way to do this in the way I drafted here? Is there some alternative way of using openssl for this, e.g. some native bindings for openssl lib, that would allow me to do this without relying on the command line?
You've mentioned spawn but seem to think you can't use it. Possibly showing my ignorance here, but it seems like it should be just what you're looking for: Launch openssl via spawn, then write to child.stdin and read from child.stdout. Something very roughly like this completely untested code:
var util = require('util'),
spawn = require('child_process').spawn;
function sslencrypt(buffer_to_encrypt, callback) {
var ssl = spawn('openssl', ['-encrypt', '-inkey', ',key_file.pem', '-certin']),
result = new Buffer(SOME_APPROPRIATE_SIZE),
resultSize = 0;
ssl.stdout.on('data', function (data) {
// Save up the result (or perhaps just call the callback repeatedly
// with it as it comes, whatever)
if (data.length + resultSize > result.length) {
// Too much data, our SOME_APPROPRIATE_SIZE above wasn't big enough
}
else {
// Append to our buffer
resultSize += data.length;
data.copy(result);
}
});
ssl.stderr.on('data', function (data) {
// Handle error output
});
ssl.on('exit', function (code) {
// Done, trigger your callback (perhaps check `code` here)
callback(result, resultSize);
});
// Write the buffer
ssl.stdin.write(buffer_to_encrypt);
}
You should be able to set encoding to binary when you make a call to exec, like..
exec("openssl output_something_in_binary", {encoding: 'binary'}, function(err, out, err) {
//do something with out - which is in the binary format
});
If you want to write out the content of "out" in binary, make sure to set the encoding to binary again, like..
fs.writeFile("out.bin", out, {encoding: 'binary'});
I hope this helps!

Unzipping files

I want to display OpenOffice files, .odt and .odp at client side using a web browser.
These files are zipped files. Using Ajax, I can get these files from server but these are zipped files. I have to unzip them using JavaScript, I have tried using inflate.js, http://www.onicos.com/staff/iz/amuse/javascript/expert/inflate.txt, but without success.
How can I do this?
I wrote an unzipper in Javascript. It works.
It relies on Andy G.P. Na's binary file reader and some RFC1951 inflate logic from notmasteryet. I added the ZipFile class.
working example:
http://cheeso.members.winisp.net/Unzip-Example.htm (dead link)
The source:
http://cheeso.members.winisp.net/srcview.aspx?dir=js-unzip (dead link)
NB: the links are dead; I'll find a new host soon.
Included in the source is a ZipFile.htm demonstration page, and 3 distinct scripts, one for the zipfile class, one for the inflate class, and one for a binary file reader class. The demo also depends on jQuery and jQuery UI. If you just download the js-zip.zip file, all of the necessary source is there.
Here's what the application code looks like in Javascript:
// In my demo, this gets attached to a click event.
// it instantiates a ZipFile, and provides a callback that is
// invoked when the zip is read. This can take a few seconds on a
// large zip file, so it's asynchronous.
var readFile = function(){
$("#status").html("<br/>");
var url= $("#urlToLoad").val();
var doneReading = function(zip){
extractEntries(zip);
};
var zipFile = new ZipFile(url, doneReading);
};
// this function extracts the entries from an instantiated zip
function extractEntries(zip){
$('#report').accordion('destroy');
// clear
$("#report").html('');
var extractCb = function(id) {
// this callback is invoked with the entry name, and entry text
// in my demo, the text is just injected into an accordion panel.
return (function(entryName, entryText){
var content = entryText.replace(new RegExp( "\\n", "g" ), "<br/>");
$("#"+id).html(content);
$("#status").append("extract cb, entry(" + entryName + ") id(" + id + ")<br/>");
$('#report').accordion('destroy');
$('#report').accordion({collapsible:true, active:false});
});
}
// for each entry in the zip, extract it.
for (var i=0; i<zip.entries.length; i++) {
var entry = zip.entries[i];
var entryInfo = "<h4><a>" + entry.name + "</a></h4>\n<div>";
// contrive an id for the entry, make it unique
var randomId = "id-"+ Math.floor((Math.random() * 1000000000));
entryInfo += "<span class='inputDiv'><h4>Content:</h4><span id='" + randomId +
"'></span></span></div>\n";
// insert the info for one entry as the last child within the report div
$("#report").append(entryInfo);
// extract asynchronously
entry.extract(extractCb(randomId));
}
}
The demo works in a couple of steps: The readFile fn is triggered by a click, and instantiates a ZipFile object, which reads the zip file. There's an asynchronous callback for when the read completes (usually happens in less than a second for reasonably sized zips) - in this demo the callback is held in the doneReading local variable, which simply calls extractEntries, which
just blindly unzips all the content of the provided zip file. In a real app you would probably choose some of the entries to extract (allow the user to select, or choose one or more entries programmatically, etc).
The extractEntries fn iterates over all entries, and calls extract() on each one, passing a callback. Decompression of an entry takes time, maybe 1s or more for each entry in the zipfile, which means asynchrony is appropriate. The extract callback simply adds the extracted content to an jQuery accordion on the page. If the content is binary, then it gets formatted as such (not shown).
It works, but I think that the utility is somewhat limited.
For one thing: It's very slow. Takes ~4 seconds to unzip the 140k AppNote.txt file from PKWare. The same uncompress can be done in less than .5s in a .NET program. EDIT: The Javascript ZipFile unpacks considerably faster than this now, in IE9 and in Chrome. It is still slower than a compiled program, but it is plenty fast for normal browser usage.
For another: it does not do streaming. It basically slurps in the entire contents of the zipfile into memory. In a "real" programming environment you could read in only the metadata of a zip file (say, 64 bytes per entry) and then read and decompress the other data as desired. There's no way to do IO like that in javascript, as far as I know, therefore the only option is to read the entire zip into memory and do random access in it. This means it will place unreasonable demands on system memory for large zip files. Not so much a problem for a smaller zip file.
Also: It doesn't handle the "general case" zip file - there are lots of zip options that I didn't bother to implement in the unzipper - like ZIP encryption, WinZip encryption, zip64, UTF-8 encoded filenames, and so on. (EDIT - it handles UTF-8 encoded filenames now). The ZipFile class handles the basics, though. Some of these things would not be hard to implement. I have an AES encryption class in Javascript; that could be integrated to support encryption. Supporting Zip64 would probably useless for most users of Javascript, as it is intended to support >4gb zipfiles - don't need to extract those in a browser.
I also did not test the case for unzipping binary content. Right now it unzips text. If you have a zipped binary file, you'd need to edit the ZipFile class to handle it properly. I didn't figure out how to do that cleanly. It does binary files now, too.
EDIT - I updated the JS unzip library and demo. It now does binary files, in addition to text. I've made it more resilient and more general - you can now specify the encoding to use when reading text files. Also the demo is expanded - it shows unzipping an XLSX file in the browser, among other things.
So, while I think it is of limited utility and interest, it works. I guess it would work in Node.js.
I'm using zip.js and it seems to be quite useful. It's worth a look!
Check the Unzip demo, for example.
I found jszip quite useful. I've used so far only for reading, but they have create/edit capabilities as well.
Code wise it looks something like this
var new_zip = new JSZip();
new_zip.load(file);
new_zip.files["doc.xml"].asText() // this give you the text in the file
One thing I noticed is that it seems the file has to be in binary stream format (read using the .readAsArrayBuffer of FileReader(), otherwise I was getting errors saying I might have a corrupt zip file
Edit: Note from the 2.x to 3.0.0 upgrade guide:
The load() method and the constructor with data (new JSZip(data)) have
been replaced by loadAsync().
Thanks user2677034
If you need to support other formats as well or just need good performance, you can use this WebAssembly library
it's promised based, it uses WebWorkers for threading and API is actually simple ES module
How to use
Install with npm i libarchive.js and use it as a ES module.
The library consists of two parts: ES module and webworker bundle, ES module part is your interface to talk to library, use it like any other module. The webworker bundle lives in the libarchive.js/dist folder so you need to make sure that it is available in your public folder since it will not get bundled if you're using bundler (it's all bundled up already) and specify correct path to Archive.init() method.
import {Archive} from 'libarchive.js/main.js';
Archive.init({
workerUrl: 'libarchive.js/dist/worker-bundle.js'
});
document.getElementById('file').addEventListener('change', async (e) => {
const file = e.currentTarget.files[0];
const archive = await Archive.open(file);
let obj = await archive.extractFiles();
console.log(obj);
});
// outputs
{
".gitignore": {File},
"addon": {
"addon.py": {File},
"addon.xml": {File}
},
"README.md": {File}
}
I wrote "Binary Tools for JavaScript", an open source project that includes the ability to unzip, unrar and untar: https://github.com/codedread/bitjs
Used in my comic book reader: https://github.com/codedread/kthoom (also open source).
HTH!
If anyone's reading images or other binary files from a zip file hosted at a remote server, you can use following snippet to download and create zip object using the jszip library.
// this function just get the public url of zip file.
let url = await getStorageUrl(path)
console.log('public url is', url)
//get the zip file to client
axios.get(url, { responseType: 'arraybuffer' }).then((res) => {
console.log('zip download status ', res.status)
//load contents into jszip and create an object
jszip.loadAsync(new Blob([res.data], { type: 'application/zip' })).then((zip) => {
const zipObj = zip
$.each(zip.files, function (index, zipEntry) {
console.log('filename', zipEntry.name)
})
})
Now using the zipObj you can access the files and create a src url for it.
var fname = 'myImage.jpg'
zipObj.file(fname).async('blob').then((blob) => {
var blobUrl = URL.createObjectURL(blob)

Categories

Resources