Node base64 encode doesn't give whole string - javascript

Good day,
I am having a weird issue with Node, I am encoding a file as Base64 and albeit it works for most of my PDFs that I am encoding, one in particular doesn't output the whole base64 string.
The actual b64 string starts like this: "JVBERi0xLjMKJf////8KNiAwIG9i..." but I only get "JVBERi0xLjMK"
Here is my code:
function sendPDF() {
// Grab the final PDF
require('fs').readFile(transaction.deliverable, function (err, data) {
if (err) {
console.log(err);
log(2, "Couldn't read: " + transaction.deliverable);
} else {
transaction.deliverable = new Buffer(data, 'binary').toString('base64');
//transaction.deliverable = data.toString('base64');
console.log(transaction.deliverable);
}
The commented out line was another attempt. The transaction structure is:
function Transaction(snapshot) {
var data = snapshot.val();
this.tid = snapshot.key();
this.request = data.request;
this.pages = [];
this.fileCount = 0;
this.deliverable = null;
this.fileName = "";
}
This transaction simple stores some information that I pull from Firebase however the important var, .deliverable is a string to the path of the PDF I need to encode and send.
I don't get any read errors when this happens, and the next transaction goes through this code block just fine, giving a full base64 string.
I was curious if my toString() was interpolating the base64 string, but then I thought I would have had larger problems earlier.
Any ideas? I can put this on hold and move on with my work but I would love to fix this.. Thank you.

Related

MD5 Checksum differes from browser to Node js API

I am trying to perform a checksum on a file with javascript. I'm using a FileReader and using CryptoJS with the .MD5 method as well as the CryptoJS.enc.Hex encoding.
The checksums differ from the front end (above) and on the back end, where I am getting it from ExpressFIleUpload and also generated my own with the crypto module via crypto.createHash('md5') and getting a digest via hash.digest('hex'); and out of those two, (my own and ExpressFileUpload) they too, differ..
what is going on..?
let img = document.createElement('img');
img.file = data;
let reader = new FileReader();
reader.onload = (function (someelement) {
return function (e) {
let md5 = CryptoJS.MD5(e.target.result);
let str = md5.toString(CryptoJS.enc.Hex);
console.log('str', str); // will give one random md5
};
})(img);
reader.readAsBinaryString(data);
then on the server using https://www.npmjs.com/package/express-fileupload
export async function(req, res, next) {
console.log(req.files.file.md5) // some other md5
const hash = crypto.createHash('md5');
let buff = Buffer.from(req.files.file.data, "base64").toString('utf-8');
// edit, this actually DOES come to the same if I remove
// .toString('utf-8')
// as the req.files.file.md5
hash.update(buff);
let str = hash.digest("hex");
console.log('other hash', str); // and some third completely different md5
}
can someone please explain what I am doing wrong?
Did you encode the e.target.result to UTF-8 in your front-end? I had kind of like the same problem, but then I realized, that I'd used the wrong encoding.
Try to encode your Plain String into utf-8 and hash it after that.

Why is nodeJs not reading entire binary file from disk?

I have a PDF file which I want to read into memory using NodeJS. Ideally I'd like to encode it using base64 for transferring it. But somehow the read function does not seem to read the full PDF file, which makes no sense to me. The original PDF was generated using pdfKit, and is ok and viewable using a PDF reader program.
The original file test.pdf has 90kB on disk. But if I read and write it back to disk there are just 82kB and the new PDF test-out.pdf is not ok. The pdf viewer says:
Unable to open document. The pdf document is damaged.
The base64 encoding therefore also does not work correctly. I tested it using this webservice. Does someone know why and what is happening here? And how to resolve it.
I found this post already.
fs = require('fs');
let buf = fs.readFileSync('test.pdf'); // returns raw buffer binary data
// buf = fs.readFileSync('test.pdf', {encoding:'base64'}); // for the base64 encoded data
// ...transfer the base64 data...
fs.writeFileSync('test-out.pdf', buf); // should be pdf again
EDIT MCVE:
const fs = require('fs');
const PDFDocument = require('pdfkit');
let filepath = 'output.pdf';
class PDF {
constructor() {
this.doc = new PDFDocument();
this.setupdocument();
this.doc.pipe(fs.createWriteStream(filepath));
}
setupdocument() {
var pageNumber = 1;
this.doc.on('pageAdded', () => {
this.doc.text(++pageNumber, 0.5 * (this.doc.page.width - 100), 40, {width: 100, align: 'center'});
}
);
this.doc.moveDown();
// draw some headline text
this.doc.fontSize(25).text('Some Headline');
this.doc.fontSize(15).text('Generated: ' + new Date().toUTCString());
this.doc.moveDown();
this.doc.font('Times-Roman', 11);
}
report(object) {
this.doc.moveDown();
this.doc
.text(object.location+' '+object.table+' '+Date.now())
.font('Times-Roman', 11)
.moveDown()
.text(object.name)
.font('Times-Roman', 11);
this.doc.end();
let report = fs.readFileSync(filepath);
return report;
}
}
let pdf = new PDF();
let buf = pdf.report({location: 'athome', table:'wood', name:'Bob'});
fs.writeFileSync('outfile1.pdf', buf);
The encoding option for fs.readFileSync() is for you to tell the readFile function what encoding the file already is so the code reading the file knows how to interpret the data it reads. It does not convert it into that encoding.
In this case, your PDF is binary - it's not base64 so you are telling it to try to convert it from base64 into binary which causes it to mess up the data.
You should not be passing the encoding option at all and you will then get the RAW binary buffer (which is what a PDF file is - raw binary). If you then want to convert that to base64 for some reason, you can then do buf.toString('base64') on it. But, that is not its native format and if you write that converted data back out to disk, it won't be a legal PDF file.
To just read and write the same file out to a different filename, leave off the encoding option entirely:
const fs = require('fs');
let buf = fs.readFileSync('test.pdf'); // get raw buffer binary data
fs.writeFileSync('test-out.pdf', buf); // write out raw buffer binary data
After a lot of searching I found this Github issue. The problem in my question seems to be the call of doc.end() which for some reason doesn't wait for the stream to finish (finish event of write stream). Therefore as suggested in the Github issue, the following approaches work:
callback based:
doc = new PDFDocument();
writeStream = fs.createWriteStream('filename.pdf');
doc.pipe(writeStream);
doc.end()
writeStream.on('finish', function () {
// do stuff with the PDF file
});
or promise based:
const stream = fs.createWriteStream(localFilePath);
doc.pipe(stream);
.....
doc.end();
await new Promise<void>(resolve => {
stream.on("finish", function() {
resolve();
});
});
or even nicer, instead of calling doc.end() direcly, call the function savePdfToFile below:
function savePdfToFile(pdf : PDFKit.PDFDocument, fileName : string) : Promise<void> {
return new Promise<void>((resolve, reject) => {
// To determine when the PDF has finished being written sucessfully
// we need to confirm the following 2 conditions:
//
// 1. The write stream has been closed
// 2. PDFDocument.end() was called syncronously without an error being thrown
let pendingStepCount = 2;
const stepFinished = () => {
if (--pendingStepCount == 0) {
resolve();
}
};
const writeStream = fs.createWriteStream(fileName);
writeStream.on('close', stepFinished);
pdf.pipe(writeStream);
pdf.end();
stepFinished();
});
}
This function should correctly handle the following situations:
PDF generated successfully
Error is thrown inside pdf.end() before write stream is closed
Error is thrown inside pdf.end() after write stream has been closed

How to read remote image to a base64 data url

actually there are many answers for this question. But my problem is,
i want to generate pdf dynamically with 5 external(URL) images. Im using PDFmake node module.
it supports only two ways local and base64 format. But i don't want to store images locally.
so my requirement is one function which takes url as parameter and returns base64.
so that i can store in global variable and create pdfs
thanks in advance
function urlToBase(URL){
return base64;
}
var img = urlToBase('https://unsplash.com/photos/MVx3Y17umaE');
var dd = {
content: [
{
text: 'fjfajhal'
},
{
image: img,
}
]
};
var writeStream = fs.createWriteStream('myPdf.pdf');
var pdfDoc = printer.createPdfKitDocument(dd);
pdfDoc.pipe(writeStream);
pdfDoc.end();
im using PDFmake module from npm
The contents of the remote image can first be fetched with an HTTP request, for example using the ubiquitous request npm module. The image string contents can then be transformed to a buffer and finally converted to a base64 string. To complete the transformation, add the proper data-url prefix, for example, data:image/png,base64, to the beginning of the base64 string.
Here is a rough example for a PNG image:
const request = require('request-promise-native');
let jpgDataUrlPrefix = 'data:image/png;base64,';
let imageUrl = 'https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png';
request({
url: imageUrl,
method: 'GET',
encoding: null // This is actually important, or the image string will be encoded to the default encoding
})
.then(result => {
let imageBuffer = Buffer.from(result);
let imageBase64 = imageBuffer.toString('base64');
let imageDataUrl = jpgDataUrlPrefix+imageBase64;
console.log(imageDataUrl);
});

how to read a static file from url in javascript to create an array

I searched but I don't find, I am coding a simulator and I want to do the calculus using javascript, the simulator takes 2 kind of entries. The first entries are given by user, this part is done. The second part is a lot of coefficient which are stored in csv/tsv file, the file is uploaded on the server. And I am not able to read this file, I found a lot of code on how to convert csv to array and I think that I will be able to do it alone. For now I am doing step by step so I just want to read the csv file to put it inside a table, when I use the code shown it works if I use an < input type="file" > but I am not able to make it works with a static url. Can You help me?
function myprocessFile()
{
var fileSize = 0;
var theFile = document.getElementById("myFile").files[0];
document.getElementById("toto").innerHTML = blob;
if (theFile)
{
var table = document.getElementById("myTable");
var headerLine = "";
var myReader = new FileReader();
myReader.onload = function(e)
{
// CREATE TABLE
}
myReader.readAsText(theFile);
}
return false;
}
You could use the fetch API :
fetch('url/to/your/csv/file')
.then(function(response) {
return response.text()
})
.then(function(csv) {
// convert your csv to an array
});

Python / Django fails at decoding file encoded as base64 by javascript

I'm using this, in react, to base64 encode an image file:
fileToBase64 = (filename, filepath) => {
return new Promise(resolve => {
var file = new File([filename], filepath);
var reader = new FileReader();
reader.onload = function(event) {
resolve(event.target.result);
};
reader.readAsDataURL(file);
});
};
Which gets called by this:
handleChangeFile = event => {
const { name, files } = event.target;
if (files.length) {
const file = files[0];
let fields = this.state.fields;
this.fileToBase64(file).then(result => {
fields[name].value = result;
});
fields[name].isFilled = true;
this.setState({
fields: fields
});
}
};
And the whole fields variable gets posted to a django server, no issues so far.
On the python django end:
str_encoded = request.data["file"]
str_decoded = base64.b64decode(str_encoded)
The second line returns an error that binascii.Error: Invalid base64-encoded string: length cannot be 1 more than a multiple of 4. I've googled and read that this is probably a padding issue, but I don't know how to fix it.
You will have to strip the base64 string from the prefix added by javascript.
The prefix is sth like data:{type};base64,{actual-base64-string-follows}
In php, where I had same issue, I tested if string starts with "data:" prefix and I strip it from start of string up to the position of the ; (semicolon) plus 8 characters (to catch the final ";base64,").
Then you can use python to decode the base64 string remaining as it is now a valid base64 string.

Categories

Resources