I am trying to perform a checksum on a file with javascript. I'm using a FileReader and using CryptoJS with the .MD5 method as well as the CryptoJS.enc.Hex encoding.
The checksums differ from the front end (above) and on the back end, where I am getting it from ExpressFIleUpload and also generated my own with the crypto module via crypto.createHash('md5') and getting a digest via hash.digest('hex'); and out of those two, (my own and ExpressFileUpload) they too, differ..
what is going on..?
let img = document.createElement('img');
img.file = data;
let reader = new FileReader();
reader.onload = (function (someelement) {
return function (e) {
let md5 = CryptoJS.MD5(e.target.result);
let str = md5.toString(CryptoJS.enc.Hex);
console.log('str', str); // will give one random md5
};
})(img);
reader.readAsBinaryString(data);
then on the server using https://www.npmjs.com/package/express-fileupload
export async function(req, res, next) {
console.log(req.files.file.md5) // some other md5
const hash = crypto.createHash('md5');
let buff = Buffer.from(req.files.file.data, "base64").toString('utf-8');
// edit, this actually DOES come to the same if I remove
// .toString('utf-8')
// as the req.files.file.md5
hash.update(buff);
let str = hash.digest("hex");
console.log('other hash', str); // and some third completely different md5
}
can someone please explain what I am doing wrong?
Did you encode the e.target.result to UTF-8 in your front-end? I had kind of like the same problem, but then I realized, that I'd used the wrong encoding.
Try to encode your Plain String into utf-8 and hash it after that.
Related
I have a PDF file which I want to read into memory using NodeJS. Ideally I'd like to encode it using base64 for transferring it. But somehow the read function does not seem to read the full PDF file, which makes no sense to me. The original PDF was generated using pdfKit, and is ok and viewable using a PDF reader program.
The original file test.pdf has 90kB on disk. But if I read and write it back to disk there are just 82kB and the new PDF test-out.pdf is not ok. The pdf viewer says:
Unable to open document. The pdf document is damaged.
The base64 encoding therefore also does not work correctly. I tested it using this webservice. Does someone know why and what is happening here? And how to resolve it.
I found this post already.
fs = require('fs');
let buf = fs.readFileSync('test.pdf'); // returns raw buffer binary data
// buf = fs.readFileSync('test.pdf', {encoding:'base64'}); // for the base64 encoded data
// ...transfer the base64 data...
fs.writeFileSync('test-out.pdf', buf); // should be pdf again
EDIT MCVE:
const fs = require('fs');
const PDFDocument = require('pdfkit');
let filepath = 'output.pdf';
class PDF {
constructor() {
this.doc = new PDFDocument();
this.setupdocument();
this.doc.pipe(fs.createWriteStream(filepath));
}
setupdocument() {
var pageNumber = 1;
this.doc.on('pageAdded', () => {
this.doc.text(++pageNumber, 0.5 * (this.doc.page.width - 100), 40, {width: 100, align: 'center'});
}
);
this.doc.moveDown();
// draw some headline text
this.doc.fontSize(25).text('Some Headline');
this.doc.fontSize(15).text('Generated: ' + new Date().toUTCString());
this.doc.moveDown();
this.doc.font('Times-Roman', 11);
}
report(object) {
this.doc.moveDown();
this.doc
.text(object.location+' '+object.table+' '+Date.now())
.font('Times-Roman', 11)
.moveDown()
.text(object.name)
.font('Times-Roman', 11);
this.doc.end();
let report = fs.readFileSync(filepath);
return report;
}
}
let pdf = new PDF();
let buf = pdf.report({location: 'athome', table:'wood', name:'Bob'});
fs.writeFileSync('outfile1.pdf', buf);
The encoding option for fs.readFileSync() is for you to tell the readFile function what encoding the file already is so the code reading the file knows how to interpret the data it reads. It does not convert it into that encoding.
In this case, your PDF is binary - it's not base64 so you are telling it to try to convert it from base64 into binary which causes it to mess up the data.
You should not be passing the encoding option at all and you will then get the RAW binary buffer (which is what a PDF file is - raw binary). If you then want to convert that to base64 for some reason, you can then do buf.toString('base64') on it. But, that is not its native format and if you write that converted data back out to disk, it won't be a legal PDF file.
To just read and write the same file out to a different filename, leave off the encoding option entirely:
const fs = require('fs');
let buf = fs.readFileSync('test.pdf'); // get raw buffer binary data
fs.writeFileSync('test-out.pdf', buf); // write out raw buffer binary data
After a lot of searching I found this Github issue. The problem in my question seems to be the call of doc.end() which for some reason doesn't wait for the stream to finish (finish event of write stream). Therefore as suggested in the Github issue, the following approaches work:
callback based:
doc = new PDFDocument();
writeStream = fs.createWriteStream('filename.pdf');
doc.pipe(writeStream);
doc.end()
writeStream.on('finish', function () {
// do stuff with the PDF file
});
or promise based:
const stream = fs.createWriteStream(localFilePath);
doc.pipe(stream);
.....
doc.end();
await new Promise<void>(resolve => {
stream.on("finish", function() {
resolve();
});
});
or even nicer, instead of calling doc.end() direcly, call the function savePdfToFile below:
function savePdfToFile(pdf : PDFKit.PDFDocument, fileName : string) : Promise<void> {
return new Promise<void>((resolve, reject) => {
// To determine when the PDF has finished being written sucessfully
// we need to confirm the following 2 conditions:
//
// 1. The write stream has been closed
// 2. PDFDocument.end() was called syncronously without an error being thrown
let pendingStepCount = 2;
const stepFinished = () => {
if (--pendingStepCount == 0) {
resolve();
}
};
const writeStream = fs.createWriteStream(fileName);
writeStream.on('close', stepFinished);
pdf.pipe(writeStream);
pdf.end();
stepFinished();
});
}
This function should correctly handle the following situations:
PDF generated successfully
Error is thrown inside pdf.end() before write stream is closed
Error is thrown inside pdf.end() after write stream has been closed
I'm currently doing this (see snippet below) to get an md5 hash string for the image files I'm uploading (I'm using the hash as fileNames):
NOTE: I'm using the md5 package to generate the hash (it's loaded into the snippet).
There are 4 available methods on FileReader() to read the files. They all seem to produce good results.
readAsText(file)
readAsBinaryString(file);
readAsArrayBuffer(file);
readAsDataURL(file);
Which is should I be using in this case and why? Can you also explain the difference between them?
function onFileSelect(e) {
const file = e.target.files[0];
const reader1 = new FileReader();
const reader2 = new FileReader();
const reader3 = new FileReader();
const reader4 = new FileReader();
reader1.onload = (event) => {
const fileContent = event.target.result;
console.log('Hash from "readAsText()": ');
console.log(md5(fileContent));
}
reader2.onload = (event) => {
const fileContent = event.target.result;
console.log('Hash from "readAsBinaryString()": ');
console.log(md5(fileContent));
}
reader3.onload = (event) => {
const fileContent = event.target.result;
console.log('Hash from "readAsArrayBuffer()": ');
console.log(md5(fileContent));
}
reader4.onload = (event) => {
const fileContent = event.target.result;
console.log('Hash from "readAsDataURL()": ');
console.log(md5(fileContent));
}
reader1.readAsText(file);
reader2.readAsBinaryString(file);
reader3.readAsArrayBuffer(file);
reader4.readAsDataURL(file);
}
.myDiv {
margin-bottom: 10px;
}
<script src="https://cdn.jsdelivr.net/npm/js-md5#0.7.3/src/md5.min.js"></script>
<div class="myDiv">Pick an image file to see the 4 hash results on console.log()</div>
<input type='file' onChange="onFileSelect(event)" accept='.jpg,.jpeg,.png,.gif' />
Use readAsArrayBuffer.
readAsBinaryString() and readAsDataURL() will make your computer do a lot more work than what needs to be done:
read the blob as binary stream
convert to UTF-16 / base64 String (remember strings are not mutable in js, any operation you do on it will actually create a copy in memory)
[ pass to your lib ]
convert to binary string
process the data
Also, it seems your library doesn't handle data URLs and fails on UTF-16 strings.
readAsText() by default will try to interpret you binary data as an UTF-8 text sequence, which is pretty bad for binary data like raster image:
// generate some binary data
document.createElement('canvas').toBlob(blob => {
const utf8_reader = new FileReader();
const bin_reader = new FileReader();
let done = 0;
utf8_reader.onload = bin_reader.onload = e => {
if(++done===2) {
console.log('same results: ', bin_reader.result === utf8_reader.result);
console.log("utf8\n", utf8_reader.result);
console.log("utf16\n", bin_reader.result);
}
}
utf8_reader.readAsText(blob);
bin_reader.readAsBinaryString(blob);
});
readAsArrayBuffer on the other hand will just allocate the binary data as is in memory. Simple I/O, no processing.
To manipulate this data, we can use TypedArrays views over this binary data, which being only views, won't create any overhead either.
And if you look at the library you are using, they will anyway pass your input to such an Uint8Array to further process it. However beware they apparently need you to pass an Uint8Array view of this ArrayBuffer instead of the nude ArrayBuffer directly.
I'm using this, in react, to base64 encode an image file:
fileToBase64 = (filename, filepath) => {
return new Promise(resolve => {
var file = new File([filename], filepath);
var reader = new FileReader();
reader.onload = function(event) {
resolve(event.target.result);
};
reader.readAsDataURL(file);
});
};
Which gets called by this:
handleChangeFile = event => {
const { name, files } = event.target;
if (files.length) {
const file = files[0];
let fields = this.state.fields;
this.fileToBase64(file).then(result => {
fields[name].value = result;
});
fields[name].isFilled = true;
this.setState({
fields: fields
});
}
};
And the whole fields variable gets posted to a django server, no issues so far.
On the python django end:
str_encoded = request.data["file"]
str_decoded = base64.b64decode(str_encoded)
The second line returns an error that binascii.Error: Invalid base64-encoded string: length cannot be 1 more than a multiple of 4. I've googled and read that this is probably a padding issue, but I don't know how to fix it.
You will have to strip the base64 string from the prefix added by javascript.
The prefix is sth like data:{type};base64,{actual-base64-string-follows}
In php, where I had same issue, I tested if string starts with "data:" prefix and I strip it from start of string up to the position of the ; (semicolon) plus 8 characters (to catch the final ";base64,").
Then you can use python to decode the base64 string remaining as it is now a valid base64 string.
I am trying to use browserify to access a local binary file (that is, the binary file is in the same directory as the javascript file, which is in the user's computer).
I haven't succeeded. Here's what I tried and what I know:
~) I know fs won't work...
0) I tried using the require('html') but it says 'ajax not supported in this browser' [I am using chromium... but I'd assume it's roughly the same thing as chrome].
1) I tried using 'browser-request'. This reads the binary file... as a string.
It is based on 'request' so I should be able to configure the options, including encoding: null, which would solve all my problems but...looking at the source code, you'll see that no support for the encoding option is present. Not even a warning.
2) I used xmlhttprequest, which required the 'html' module... so again, I get the same error as in 0) Strangely enough, 'browser-request' uses this module and it works... and I have absolutely no idea why.
3) At this point, I looked into html5 file system support. It would work but I don't want the user to specify a file... seeing as I really ONLY want to get the buffer to memory. Is there any other way to access the file? Perhaps using --allow-file-access when starting chromium?
4) If all else fails, I just want a way to get the Buffer into my code. I guess I could just use node on shell and copy paste the result of reading the file into memory...
Is there any hope at all?
Here's what somewhat works:
function toArrayBuffer(buffer) {
var ab = new ArrayBuffer(buffer.length);
var view = new Uint8Array(ab);
for (var i = 0; i < buffer.length; ++i) {
view[i] = buffer[i];
}
return ab;
}
// node: readFileSync + toArrayBuffer
// browser: ajax http request
function readFile(filename, doneCb) {
var isNode =
typeof global !== "undefined" &&
global.toString() == '[object global]';
if (isNode) {
var fs = require('fs');
var buffer = fs.readFileSync(filename);
buffer = toArrayBuffer(buffer);
doneCb(buffer);
} else {
var http = require('http');
var buf;
var req = http.get({ path : '/'+ filename }, function (res) {
res.on('data', function (chunk) {
buf = chunk;
});
res.on('end', function () {
doneCb(buf);
});
});
req.xhr.responseType = 'arraybuffer';
}
}
It requires a server and I'm strugging with on how to make it work in testling.
Another approach I can think of is to use brfs with base64 encoding:
var base64 = fs.readFileSync('file.bin', enc='base64');
var buf = new Buffer(base64, 'base64');
var ab = toArrayBuffer(buf);
It is simpler, but it is not dynamic and cannot be refactored to self-contained function.
If it's not dynamic use brfs transform.
I'm trying to do some experiment with HTML5, WebSocket and File API.
I'm using the Tomcat7 WebSocket implementation.
I'm able to send and received text messages from the servlet. What I want to do now is to send from the servlet to the client JSON objects, but I want to avoid text message in order to skip the JSON.parse (or similar) on the client, so I'm trying to send binary messages.
The servlet part is really simple:
String s = "{arr : [1,2]}";
CharBuffer cbuf = CharBuffer.wrap(s);
CharsetEncoder encoder = Charset.forName("UTF-8").newEncoder();
getWsOutbound().writeBinaryMessage(encoder.encode(cbuf));
getWsOutbound().flush();
After this message, on the client I see that I received a binary frame, that is converted to a Blob object (http://www.w3.org/TR/FileAPI/#dfn-Blob).
The question is: is it possible to get the JSON object from the Blob?
I took a look at the FileReader interface (http://www.w3.org/TR/FileAPI/#FileReader-interface), and I used code like this to inspect what the FileReader can do (the first line creates a brand new Blob, so you can test on the fly if you want):
var b = new Blob([{"test": "toast"}], {type : "application/json"});
var fr = new FileReader();
fr.onload = function(evt) {
var res = evt.target.result;
console.log("onload",arguments, res, typeof res);
};
fr.readAsArrayBuffer(b);
using all the "readAs..." methods that I saw on the File Reader implementation (I'm using Chrome 22). Anyway I didn't find something useful.
Did you have any suggestion? Thanks.
You should have tried readAsText() instead of readAsArrayBuffer() (JSON is text in the end).
You've also missed to stringify the object (convert to JSON text)
var b = new Blob([JSON.stringify({"test": "toast"})], {type : "application/json"}),
fr = new FileReader();
fr.onload = function() {
console.log(JSON.parse(this.result))
};
fr.readAsText(b);
To convert Blob/File that contains JSON data to a JavaScript object use it:
JSON.parse(await blob.text());
The example:
Select a JSON file, then you can use it in the browser's console (json object).
const input = document.createElement("input");
input.type = "file";
input.accept = "application/json";
document.body.prepend(input);
input.addEventListener("change", async event => {
const json = JSON.parse(await input.files[0].text());
console.log("json", json);
globalThis.json = json;
});
What you're doing is conceptually wrong. JSON is a string representation of an object, not an object itself. So, when you send a binary representation of JSON over the wire, you're sending a binary representation of the string. There's no way to get around parsing JSON on the client side to convert a JSON string to a JavaScript Object.
You absolutely should always send JSON as text to the client, and you should always call JSON.parse. Nothing else is going to be easy for you.
let reader = new FileReader()
reader.onload = e => {
if (e.target.readyState === 2) {
let res = {}
if (window.TextDecoder) {
const enc = new TextDecoder('utf-8')
res = JSON.parse(enc.decode(new Uint8Array(e.target.result))) //转化成json对象
} else {
res = JSON.parse(String.fromCharCode.apply(null, new Uint8Array(e.target.result)))
}
console.info('import-back:: ', res)
}
}
reader.readAsArrayBuffer(response)