Streaming in Node.JS - javascript

I would like to know the good practice if I'm streaming data and want to have access to whole data after streaming;
I'm streaming like this:
res._oldWrite = res.write;
res.write = function (chunk, encoding, cb) {
var decoded = chunk.toString(encoding);
write.write(new Buffer(decoded, encoding), encoding, cb);
return res._oldWrite.call(res, new Buffer(decoded, encoding), encoding, cb);
}
Now that I want to access to my data I did something like:
res._oldWrite = res.write;
var jsonData = '';
res.write = function (chunk, encoding, cb) {
var decoded = chunk.toString(encoding);
jsonData += decoded;
write.write(new Buffer(decoded, encoding), encoding, cb);
return res._oldWrite.call(res, new Buffer(decoded, encoding), encoding, cb);
}
res.on('finish', function(){
// Now I can have access to jsopnData but it is gross ; what is the right way?
})
But isn't there any better way to do it?

So I'm not 100% sure I understand your question #Web Developer, but since you asked for code, below is all that I meant.
Note that there are probably other shorter ways of doing the same thing (but I'm not sure what you mean by "access whole data after streaming" --- store in memory? all at once? etc):
var dataStream = require('stream').Writable();
//I'm assuming the "real processing" is saving to a file
var fileStream = fs.createWriteStream('data.txt');
var masterStream = require('stream').Writeable();
masterStream._write = function (chunk, enc, next) {
dataStream.write(chunk);
fileStream.write(chunk);
next();
};
//if you now write to master stream, you get values in both dataStream and fileStream
//you can now listen to dataStream and "have access to the data"

Related

MD5 Checksum differes from browser to Node js API

I am trying to perform a checksum on a file with javascript. I'm using a FileReader and using CryptoJS with the .MD5 method as well as the CryptoJS.enc.Hex encoding.
The checksums differ from the front end (above) and on the back end, where I am getting it from ExpressFIleUpload and also generated my own with the crypto module via crypto.createHash('md5') and getting a digest via hash.digest('hex'); and out of those two, (my own and ExpressFileUpload) they too, differ..
what is going on..?
let img = document.createElement('img');
img.file = data;
let reader = new FileReader();
reader.onload = (function (someelement) {
return function (e) {
let md5 = CryptoJS.MD5(e.target.result);
let str = md5.toString(CryptoJS.enc.Hex);
console.log('str', str); // will give one random md5
};
})(img);
reader.readAsBinaryString(data);
then on the server using https://www.npmjs.com/package/express-fileupload
export async function(req, res, next) {
console.log(req.files.file.md5) // some other md5
const hash = crypto.createHash('md5');
let buff = Buffer.from(req.files.file.data, "base64").toString('utf-8');
// edit, this actually DOES come to the same if I remove
// .toString('utf-8')
// as the req.files.file.md5
hash.update(buff);
let str = hash.digest("hex");
console.log('other hash', str); // and some third completely different md5
}
can someone please explain what I am doing wrong?
Did you encode the e.target.result to UTF-8 in your front-end? I had kind of like the same problem, but then I realized, that I'd used the wrong encoding.
Try to encode your Plain String into utf-8 and hash it after that.

How do I write a Node.js module to handle an incoming piped stream

I'm trying to write a node module that accepts an incoming piped binary (or base-64-encoded) stream, but frankly I don't even know where to start. I can't see any examples in the Node docs about handling incoming streams; I only see examples on consuming them?
Say for example I want to be able to do this:
var asset = new ProjectAsset('myFile', __dirname + '/image.jpg')
var stream = fs.createReadStream(__dirname + '/image.jpg', { encoding: 'base64' }).pipe(asset)
stream.on('finish', function() {
done()
})
I've gotten ProjectAsset looking like this, but I'm at a loss of where to go next:
'use strict'
var stream = require('stream'),
util = require('util')
var ProjectAsset = function() {
var self = this
Object.defineProperty(self, 'binaryData', {
configurable: true,
writable: true
})
stream.Stream.call(self)
self.on('pipe', function(src) {
// does it happen here? how do I set self.binaryData?
})
return self
}
util.inherits(ProjectAsset, stream.Stream)
module.exports = ProjectAsset
module.exports.DEFAULT_FILE_NAME = 'file'
It is possible to inherit from stream.Stream and make it work, however based on what's available in the documentation I would suggest inheriting from stream.Writable. Piping into a stream.Writable you'll need to have _write(chunk, encoding, done) defined to handle the piping. Here is an example:
var asset = new ProjectAsset('myFile', __dirname + '/image.jpg')
var stream = fs.createReadStream(__dirname + '/image.jpg', { encoding: 'base64' }).pipe(asset)
stream.on('finish', function() {
console.log(asset.binaryData);
})
Project Asset
'use strict'
var stream = require('stream'),
util = require('util')
var ProjectAsset = function() {
var self = this
self.data
self.binaryData = [];
stream.Writable.call(self)
self._write = function(chunk, encoding, done) {
// Can handle this data however you want
self.binaryData.push(chunk.toString())
// Call after processing data
done()
}
self.on('finish', function() {
self.data = Buffer.concat(self.binaryData)
})
return self
}
util.inherits(ProjectAsset, stream.Writable)
module.exports = ProjectAsset
module.exports.DEFAULT_FILE_NAME = 'file'
If you're looking to also read from the stream, take a look at inheriting from stream.Duplex and also including the _read(size) method.
There's also the simplified constructors api if you're doing something simpler.
Im not sure if this is exaclty what you were looking for but i think you could handle it using the buffer api with Buffer.concat on an array of buffers that can be retrieved form chunk on the stream data listener
'use strict'
var stream = require('stream'),
util = require('util');
var ProjectAsset = function() {
var self = this
Object.defineProperty(self, 'binaryData', {
configurable: true,
writable: true
})
stream.Stream.call(self)
var data;
var dataBuffer=[];
self.on('data', function(chunk) {
dataBuffer.push(chunk);
}).on('end',function(){
data=Buffer.concat(dataBuffer);
});
self.binaryData=data.toString('binary');
return self
}
util.inherits(ProjectAsset, stream.Stream)
module.exports = ProjectAsset
module.exports.DEFAULT_FILE_NAME = 'file'
Since your using var asset = new ProjectAsset('myFile', __dirname + '/image.jpg') I suppose your ProjectAsset responsibility is to take some input stream do some transformations and write that to a file. You could implement a transform stream because you receive some input from a stream and generate some output of it that can be saved to a file or to some other write stream.
You could of course implement a transform stream by inheriting from node.js Transform Stream but inheriting is quite cumbersome so my implementation uses through2 to implement the transform stream:
module.exports = through2(function (chunk, enc, callback) {
// This function is called whenever a piece of data from the incoming stream is read
// Transform the chunk or buffer the chunk in case you need more data to transform
// Emit a data package to the next stream in the pipe or omit this call if you need more data from the input stream to be read
this.push(chunk);
// Signal through2 that you processed the incoming data package
callback();
}))
Usage
var stream = fs.createReadStream(__dirname + '/image.jpg', { encoding: 'base64' })
.pipe(projectAsset)
.pipe(fs.createWriteStream(__dirname + '/image.jpg'));
As you can see in this example implementing a stream pipeline fully decouples data transformation and saving of the data.
Factory Function
If you prefer to use a constructor like approach in the project asset module because you need to pass some values or things you could easily export a constructor function as shown below
var through2 = require('through2');
module.exports = function(someData) {
// New stream is returned that can use someData argument for doing things
return through2(function (chunk, enc, callback) {
// This function is called whenever a piece of data from the incoming stream is read
// Transform the chunk or buffer the chunk in case you need more data to transform
// Emit a data package to the next stream in the pipe or omit this call if you need more data from the input stream to be read
this.push(chunk);
// Signal through2 that you processed the incoming data package
callback();
});
}
Usage
var stream = fs.createReadStream(__dirname + '/image.jpg', { encoding: 'base64' })
.pipe(projectAsset({ foo: 'bar' }))
.pipe(fs.createWriteStream(__dirname + '/image.jpg'));

Request Stream Get + Post edited JSON body in Node.js

I'm new to Node.js and am working on a project and I'd like to use Request to stream from one endpoint to another. My goal is to use Request to get and post an edited JSON body using a pipe stream. I know that when doing so, content-type and content-length will be preserved in the POST headers. However, I would like to apply .forEach to all JSON objects in the body from the first url, and post them to the second url.
I'm not sure about the correct format, and I'd appreciate some clarification.
I know the basic syntax is this:
request.get('URL').pipe(request.post('URL'));
And so far my best guess is something like this:
request('FIRST_URL', function (error, response, body) {
body = JSON.parse(body);
body.forEach( function(arg) {
//return edited body
});
}).pipe(request.post('SECOND_URL'));
Am I missing something? Is there a better way to do this?
You could write your own transform stream. For example:
var Transform = require('stream').Transform;
var inherits = require('util').inherits;
function JSONTransform() {
Transform.call(this);
this._bufffer = '';
}
inherits(JSONTransform, Transform);
JSONTransform.prototype._transform = function(chunk, enc, cb) {
this._buffer += chunk;
cb();
});
JSONTransform.prototype._flush = function(cb) {
try {
var result = JSON.parse(this._buffer);
this._buffer = null;
// Do whatever transformations
// ...
this.push(JSON.stringify(result));
cb();
} catch (ex) {
cb(ex);
}
});
// Then just pipe
request.get('FIRST_URL')
.pipe(new JSONTransform())
.pipe(request.post('SECOND_URL'));
One other slightly different solution that may be worth considering would be to use a third-party streaming JSON parser module, which may or may not work for your use case.

Node base64 encode doesn't give whole string

Good day,
I am having a weird issue with Node, I am encoding a file as Base64 and albeit it works for most of my PDFs that I am encoding, one in particular doesn't output the whole base64 string.
The actual b64 string starts like this: "JVBERi0xLjMKJf////8KNiAwIG9i..." but I only get "JVBERi0xLjMK"
Here is my code:
function sendPDF() {
// Grab the final PDF
require('fs').readFile(transaction.deliverable, function (err, data) {
if (err) {
console.log(err);
log(2, "Couldn't read: " + transaction.deliverable);
} else {
transaction.deliverable = new Buffer(data, 'binary').toString('base64');
//transaction.deliverable = data.toString('base64');
console.log(transaction.deliverable);
}
The commented out line was another attempt. The transaction structure is:
function Transaction(snapshot) {
var data = snapshot.val();
this.tid = snapshot.key();
this.request = data.request;
this.pages = [];
this.fileCount = 0;
this.deliverable = null;
this.fileName = "";
}
This transaction simple stores some information that I pull from Firebase however the important var, .deliverable is a string to the path of the PDF I need to encode and send.
I don't get any read errors when this happens, and the next transaction goes through this code block just fine, giving a full base64 string.
I was curious if my toString() was interpolating the base64 string, but then I thought I would have had larger problems earlier.
Any ideas? I can put this on hold and move on with my work but I would love to fix this.. Thank you.

How to inflate part of string

While building a NNTP client in NodeJS, I have encountered the following problem. When calling the XZVER command, the first data I receive from the socket connection contains both a string and an inflated string;
224 compressed data follows (zlib version 1.2.3.3)
^*�u�#����`*�Ti���d���x�;i�R��ɵC���eT�����U'�|/S�0���� rd�
z�t]2���t�bb�3ѥ��>�͊0�ܵ��b&b����<1/ �C�<[<��d���:��VW̡��gBBim�$p#I>5�cZ�*ψ%��u}i�k�j
�u�t���8�K��`>��im
When I split this string and try to inflate it like this;
lines = chunk.toString().split('\r\n');
response = lines.shift();
zlib.inflate(new Buffer(lines.shift()), function (error, data) {
console.log(arguments);
callback();
});
I receive the following error;
[Error: invalid code lengths set] errno: -3, code: 'Z_DATA_ERROR'
Any help is welcome, I am kinda stuck here :(
UPDATE
After implementing the answer of mscdex, the whole function looks like this;
var util = require('util'),
zlib = require('zlib'),
Transform = require('stream').Transform;
function CompressedStream () {
var self = this;
this._transform = function (chunk, encoding, callback) {
var response = chunk.toString(),
crlfidx = response.indexOf('\r\n');
response = response.substring(0, crlfidx);
console.log(response);
zlib.gunzip(chunk.slice(crlfidx + 2), function (error, data) {
console.log(arguments);
callback();
});
};
Transform.call(this/*, { objectMode: true } */);
};
util.inherits(CompressedStream, Transform);
module.exports = CompressedStream;
You should probably avoid using split() in case those two bytes are in the raw data. You might try something like this instead:
var response = chunk.toString(),
crlfidx = response.indexOf('\r\n');
// should probably check crlfidx > -1 here ...
response = response.substring(0, crlfidx);
zlib.inflate(chunk.slice(crlfidx + 2), function (error, data) {
console.log(arguments);
callback();
});
However if you're doing this inside a 'data' event handler, you should be aware that you may not get the data you expect in a single chunk. Specifically you could get a CRLF split between chunks or you could get multiple responses in a single chunk.
It seems that my chunks were incorrectly encoded. By removing socket.setEncoding('utf8');, the problem was solved.

Categories

Resources