Node.js Piping the same readable stream into multiple (writable) targets - javascript

I need to run two commands in series that need to read data from the same stream.
After piping a stream into another the buffer is emptied so i can't read data from that stream again so this doesn't work:
var spawn = require('child_process').spawn;
var fs = require('fs');
var request = require('request');
var inputStream = request('http://placehold.it/640x360');
var identify = spawn('identify',['-']);
inputStream.pipe(identify.stdin);
var chunks = [];
identify.stdout.on('data',function(chunk) {
chunks.push(chunk);
});
identify.stdout.on('end',function() {
var size = getSize(Buffer.concat(chunks)); //width
var convert = spawn('convert',['-','-scale',size * 0.5,'png:-']);
inputStream.pipe(convert.stdin);
convert.stdout.pipe(fs.createWriteStream('half.png'));
});
function getSize(buffer){
return parseInt(buffer.toString().split(' ')[2].split('x')[0]);
}
Request complains about this
Error: You cannot pipe after data has been emitted from the response.
and changing the inputStream to fs.createWriteStream yields the same issue of course.
I don't want to write into a file but reuse in some way the stream that request produces (or any other for that matter).
Is there a way to reuse a readable stream once it finishes piping?
What would be the best way to accomplish something like the above example?

You have to create duplicate of the stream by piping it to two streams. You can create a simple stream with a PassThrough stream, it simply passes the input to the output.
const spawn = require('child_process').spawn;
const PassThrough = require('stream').PassThrough;
const a = spawn('echo', ['hi user']);
const b = new PassThrough();
const c = new PassThrough();
a.stdout.pipe(b);
a.stdout.pipe(c);
let count = 0;
b.on('data', function (chunk) {
count += chunk.length;
});
b.on('end', function () {
console.log(count);
c.pipe(process.stdout);
});
Output:
8
hi user

The first answer only works if streams take roughly the same amount of time to process data. If one takes significantly longer, the faster one will request new data, consequently overwriting the data still being used by the slower one (I had this problem after trying to solve it using a duplicate stream).
The following pattern worked very well for me. It uses a library based on Stream2 streams, Streamz, and Promises to synchronize async streams via a callback. Using the familiar example from the first answer:
spawn = require('child_process').spawn;
pass = require('stream').PassThrough;
streamz = require('streamz').PassThrough;
var Promise = require('bluebird');
a = spawn('echo', ['hi user']);
b = new pass;
c = new pass;
a.stdout.pipe(streamz(combineStreamOperations));
function combineStreamOperations(data, next){
Promise.join(b, c, function(b, c){ //perform n operations on the same data
next(); //request more
}
count = 0;
b.on('data', function(chunk) { count += chunk.length; });
b.on('end', function() { console.log(count); c.pipe(process.stdout); });

You can use this small npm package I created:
readable-stream-clone
With this you can reuse readable streams as many times as you need

For general problem, the following code works fine
var PassThrough = require('stream').PassThrough
a=PassThrough()
b1=PassThrough()
b2=PassThrough()
a.pipe(b1)
a.pipe(b2)
b1.on('data', function(data) {
console.log('b1:', data.toString())
})
b2.on('data', function(data) {
console.log('b2:', data.toString())
})
a.write('text')

I have a different solution to write to two streams simultaneously, naturally, the time to write will be the addition of the two times, but I use it to respond to a download request, where I want to keep a copy of the downloaded file on my server (actually I use a S3 backup, so I cache the most used files locally to avoid multiple file transfers)
/**
* A utility class made to write to a file while answering a file download request
*/
class TwoOutputStreams {
constructor(streamOne, streamTwo) {
this.streamOne = streamOne
this.streamTwo = streamTwo
}
setHeader(header, value) {
if (this.streamOne.setHeader)
this.streamOne.setHeader(header, value)
if (this.streamTwo.setHeader)
this.streamTwo.setHeader(header, value)
}
write(chunk) {
this.streamOne.write(chunk)
this.streamTwo.write(chunk)
}
end() {
this.streamOne.end()
this.streamTwo.end()
}
}
You can then use this as a regular OutputStream
const twoStreamsOut = new TwoOutputStreams(fileOut, responseStream)
and pass it to to your method as if it was a response or a fileOutputStream

If you have async operations on the PassThrough streams, the answers posted here won't work.
A solution that works for async operations includes buffering the stream content and then creating streams from the buffered result.
To buffer the result you can use concat-stream
const Promise = require('bluebird');
const concat = require('concat-stream');
const getBuffer = function(stream){
return new Promise(function(resolve, reject){
var gotBuffer = function(buffer){
resolve(buffer);
}
var concatStream = concat(gotBuffer);
stream.on('error', reject);
stream.pipe(concatStream);
});
}
To create streams from the buffer you can use:
const { Readable } = require('stream');
const getBufferStream = function(buffer){
const stream = new Readable();
stream.push(buffer);
stream.push(null);
return Promise.resolve(stream);
}

What about piping into two or more streams not at the same time ?
For example :
var PassThrough = require('stream').PassThrough;
var mybiraryStream = stream.start(); //never ending audio stream
var file1 = fs.createWriteStream('file1.wav',{encoding:'binary'})
var file2 = fs.createWriteStream('file2.wav',{encoding:'binary'})
var mypass = PassThrough
mybinaryStream.pipe(mypass)
mypass.pipe(file1)
setTimeout(function(){
mypass.pipe(file2);
},2000)
The above code does not produce any errors but the file2 is empty

Related

Making any reference to Nodejs' process.argv causes errors in unexpected place (reading a file)

I am writing code that generates a very large JSON object, saves it to a file, then loads the file and inserts the data into a Mongo collection. I want to pass a string from the command line when calling the script that I use to set the file name, as well as the collection name. I call it like so: node --max-old-space-size=8192 data_generator.js foo 1000000.
The code fails with error ENOENT: no such file or directory, open 'foo.json' on the third line of the function gen_collection() where I set the variable data. This error does not appear when a file foo.json already exists, even if it is empty. Before it fails, the code successfully creates a file foo.json but it contains only an empty array [].
The code fails with this same exact error when I include any reference to process.argv. This includes when I try to set any variable to a value from the process.argv array. The code works when I set the variables fname as const fname = "foo" and size as const size = 0. However, even if the only reference I have to process.argv is in a console.log i.e. adding console.log(process.argv[2] to main(), it fails with the exact same error as above.
Here is the code I am trying to run:
const { MongoClient } = require("mongodb");
const fs = require('fs');
const bjson = require('big-json');
async function main() {
const uri = "my db uri";
const client = new MongoClient(uri);
const fname = process.argv[2];
const size = parseInt(process.argv[3]);
// const fname = 'small'
// const size = 1
try {
await client.connect({ useUnifiedTopology: true });
await write_data_to_disk(fname, size);
await gen_collection(client, fname);
} catch (e) {
console.error(e);
} finally {
await client.close();
}
};
// generate data as json aray and write to local file
async function write_data_to_disk(fname, size) {
let arr = [];
for (let i = 0; i < size; i++) {
let doc = gen_document();
arr.push(doc);
}
const strStream = bjson.createStringifyStream({
body: arr
})
let logger = fs.createWriteStream(`${fname}.json`);
strStream.on('data', (d) => {
logger.write(d);
})
};
async function gen_collection(client, fname) {
let db = client.db('test');
let collection = db.collection(fname);
let data = JSON.parse(fs.readFileSync(`${fname}.json`, 'utf8')); // ERROR APPEARS ON THIS LINE
bulkUpdateOps = [];
data.forEach((doc) => {
bulkUpdateOps.push({"insertOne": {"document": doc}});
if (bulkUpdateOps.length === 1000) {
collection.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
})
if (bulkUpdateOps.length > 0) {
collection.bulkWrite(bulkUpdateOps);
}
};
function gen_document() {
// returns json object
};
You're doing
await write_data_to_disk(...)
but that function doesn't return a promise that is connected to when it's done. So, you're trying to read the resulting file BEFORE it has been created or before it has valid content in it and thus the ENOENT error as the file doesn't yet exist when you're trying to read from it in the following function.
Writestreams do not play nicely with promises unless you wrap them in your own promise that resolves when you are completely done writing to the stream and the file has been closed.
Also, you probably want to just .pipe() strStream to the logger stream. Much easier and you can then just monitor when that pipe() operation is done to resolve the promise you wrap around that operation.
You can promisify write_data_to_disk() like this:
// generate data as json aray and write to local file
function write_data_to_disk(fname, size) {
return new Promise((resolve, reject) => {
const arr = [];
for (let i = 0; i < size; i++) {
let doc = gen_document();
arr.push(doc);
}
const strStream = bjson.createStringifyStream({ body: arr });
const dest = fs.createWriteStream(`${fname}.json`, {emitClose: true});
// monitor for completion and errors
dest.on('error', reject).on('close', resolve);
strStream.on('error', reject);
// pipe all the content from strStream to the dest writeStream
strStream.pipe(dest);
});
}
Since this returns a promise that is truly tied to when the write operation is done, you can then use await write_data_to_disk(...).

How to pipe Writable Buffer to a ReadStream?

How can I take a writable stream and return a readable stream from a buffer?
I have the following to write the data that comes from an ftp server to an array of chunks:
let chunks = []
let writable = new Writable
writable._write = (chunk, encoding, callback) => {
chunks.push(chunk)
callback()
}
I am then creating a new readstream:
let readable = new ReadStream()
I then tried to pipe the writable to the readable but that doesn't seem to work:
Argument of type 'ReadStream' is not assignable to parameter of type 'WritableStream'.
writable.pipe(readable)
Here is the entire method:
export class FTP {
readStream(filePath, options = {}) {
let conn = this.getConnection(this.name)
if (!conn) return Buffer.from('')
filePath = this.forceRoot(filePath)
let chunks = []
let writable = new Writable
writable._write = (chunk, encoding, callback) => {
chunks.push(chunk)
callback()
}
let readable = new ReadStream()
conn.client.download(writable, filePath, options.start || undefined)
writable.pipe(readable)
return readable
}
}
I then read from the stream and pipe the output to the response object created from http.createServer() like this:
let stream = store.readStream(file, { start, end })
.on('open', () => stream.pipe(res))
.on('close', () => res.end())
.on('error', err => res.end(err))
Yep, Node.js streams are hard to grasp. Logically, you don't need two streams here. If you want to read from your FTP class as from a stream, you just need to implement a single readable stream. Check this section of the docs out to have an idea how to implement a readable stream from scratch:
class SourceWrapper extends Readable {
constructor(options) {
super(options);
this._source = getLowLevelSourceObject();
// Every time there's data, push it into the internal buffer.
this._source.ondata = (chunk) => {
// If push() returns false, then stop reading from source.
if (!this.push(chunk))
this._source.readStop();
};
// When the source ends, push the EOF-signaling `null` chunk.
this._source.onend = () => {
this.push(null);
};
}
// _read() will be called when the stream wants to pull more data in.
// The advisory size argument is ignored in this case.
_read(size) {
this._source.readStart();
}
}
However, from your example, I can conclude, that conn.client.download() expects a writable stream as an input parameter. In such case you just can take a standard PassThrough stream which is a duplex (i.e. writable on the left and readable on the right side) stream with no transformation applied:
const { PassThrough } = require('stream');
export class FTP {
readStream(filePath, options = {}) {
let conn = this.getConnection(this.name);
if (!conn) return Buffer.from('');
filePath = this.forceRoot(filePath);
const pt = new PassThrough();
conn.client.download(pt, filePath, options.start);
return pt;
}
}
You can find more information on Node.js streams here and here.
UPD: Usage example:
// assume res is an [express or similar] response object.
const s = store.readStream(file, { start, end });
s.pipe(res);
Pipe works the other way round as you're thinking. According to Node.js's documentation, pipe() is a method of Readable, and it accepts a Writable as its destination. What you were trying to do was pipe a Writable to a Readable, but actually it's a Readable that can be piped to a Writeable, not the other way round.
Try passing a PassThrough to download() and return that same PassThrough?

How can track write progress when piping with Node.js?

I am trying to track the progress of a pipe from a read stream to write stream so I can display the progress to the user.
My original idea was to track progress when the data event is emitted as shown here:
const fs = require('fs');
let final = fs.createWriteStream('output');
fs.createReadStream('file')
.on('close', () => {
console.log('done');
})
.on('error', (err) => {
console.error(err);
})
.on('data', (data) => {
console.log("data");
/* Calculate progress */
})
.pipe(final);
However I realized just cause it was read, doesn't mean it was actually written. This can be seen if the pipe is removed, as the data event still emits.
How can track write progress when piping with Node.js?
You can use a dummy Transform stream like this:
const stream = require('stream');
let totalBytes = 0;
stream.pipeline(
fs.createReadStream(from_file),
new stream.Transform({
transform(chunk, encoding, callback) {
totalBytes += chunk.length;
console.log(totalBytes);
this.push(chunk);
callback();
}
}),
fs.createWriteStream(to_file),
err => {
if (err)
...
}
);
You can do the piping manually, and make use of the callback from writable.write()
callback: < function > Callback for when this chunk of data is flushed
const fs = require('fs');
let from_file = `<from_file>`;
let to_file = '<to_file>';
let from_stream = fs.createReadStream(from_file);
let to_stream = fs.createWriteStream(to_file);
// get total size of the file
let { size } = fs.statSync(from_file);
let written = 0;
from_stream.on('data', data => {
// do the piping manually here.
to_stream.write(data, () => {
written += data.length;
console.log(`written ${written} of ${size} bytes (${(written/size*100).toFixed(2)}%)`);
});
});
Somehow I remember this thread being about memory efficiency, anyway, I've rigged up a small script that's very memory efficient and tracks progress very well. I tested it under a 230MB file and the result speaks for itself. https://gist.github.com/J-Cake/78ce059972595823243526e022e327e4
The sample file I used was a bit weird as the content-length header it reported was in fact off but the program uses no more than 4.5 MiB of memory.

Pipe NodeJS Stream to an Array

My use case is this: I am looking to read a CSV file in Node and get only the headers. I don't want to write the results of a read stream to a file, rather push the headers to an array once the file is read, so I can take that array and do something to it later on. OR, better yet, take the stream and as it is being read, transform it, then send it to an array. File is a contrived value. I am stuck at this point, where the current output of datafile is an empty array:
const fs = require('fs');
const parse = require('csv-parse');
const file = "my file path";
let dataFile = [];
rs = fs.createReadStream(file);
parser = parse({columns: true}, function(err, data){
return getHeaders(data)
})
function getHeaders(file){
return file.map(function(header){
return dataFile.push(Object.keys(header))
})
}
What do I need to do in order to get the results I need? I am expecting the headers to be found in an array as the end result.
Ok, so there is some confusing things in your code, and one mistake : you didn't actually call your code :)
First, a solution, add this line, after parser :
rs.pipe(parser).on('end', function(){
console.log(dataFile);
});
And magic, dataFile is not empty.
You stream the file from disk, pass it to the parser, then at the end, call a callback.
For the confusing parts :
parser = parse({columns: true}, function(err, data){
// You don't need to return anything from the callback, you give the impression that parser will be the result of getHeaders, it's not, it's a stream.
return getHeaders(data)
})
function getHeaders(file){
// change map to each, with no return, map returns an array of the return of the callback, you return an array with the result of each push (wich is the index of the new object).
return file.map(function(header){
return dataFile.push(Object.keys(header))
})
}
And finaly :
Please choose with ending line with ; or not, but not a mix ;)
You should end with something like :
const fs = require('fs');
const parse = require('csv-parse');
const file = "./test.csv";
var dataFile = [];
rs = fs.createReadStream(file);
parser = parse({columns: true}, function(err, data){
getHeaders(data);
});
rs.pipe(parser).on('end', function(){
console.log(dataFile);
});
function getHeaders(file){
file.each(function(header){
dataFile.push(Object.keys(header));
});
}

Read json file and append a particular array to outer scope list - javascript - Nodejs

var dir = require('node-dir');
var path = require('path');
var fs = require('fs');
var root_directory = "C:/ProgramData/XXX/YYY/Resources/LocalLibrary";
function getitems(res, fileHandler){
dir.files(root_directory, function(err, files) {
if (err) throw err;
fileHandler(res, files);
});
};
function fileHandler(res, files) {
var finaldependency = [];
files.forEach(function (fileName) {
if (path.extname(fileName) === '.Item') {
var singleObj = {};
singleObj['key'] = fileName;
var content = fs.readFileSync(path.resolve(fileName), "utf8");
var json = JSON.parse(content);
singleObj['value'] = json.References;
finaldependency.push(singleObj);
}
});
res.json(finaldependency);
}
module.exports = function(app) {
// api ---------------------------------------------------------------------
// get all items
app.get('/api/item', function(req, res) {
// use mongoose to get all items in the database
getitems(res, fileHandler);
});
Steps Involved:
1.When receive a request process all the files present in the root_directory(it contains many nested subdirectories)
2. get the list of all the files with the help of node-dir module
3. pass the file list to a asynchronous fileHandler
My question ? I am doing a synchronous blocking style of file Handling(fs.readfileSync) !!!
and append the data as a key-value pair to the outer scope list finaldependency
Is this a right approach ?? or is there any optimized approach of doing this ???
Your fileHandler function might take long time hence it can block client request. Server performance in terms of request processing might get worse with increasing client request.
You can try executing the forEach loop in process.nextTick.

Categories

Resources