How can I take a writable stream and return a readable stream from a buffer?
I have the following to write the data that comes from an ftp server to an array of chunks:
let chunks = []
let writable = new Writable
writable._write = (chunk, encoding, callback) => {
chunks.push(chunk)
callback()
}
I am then creating a new readstream:
let readable = new ReadStream()
I then tried to pipe the writable to the readable but that doesn't seem to work:
Argument of type 'ReadStream' is not assignable to parameter of type 'WritableStream'.
writable.pipe(readable)
Here is the entire method:
export class FTP {
readStream(filePath, options = {}) {
let conn = this.getConnection(this.name)
if (!conn) return Buffer.from('')
filePath = this.forceRoot(filePath)
let chunks = []
let writable = new Writable
writable._write = (chunk, encoding, callback) => {
chunks.push(chunk)
callback()
}
let readable = new ReadStream()
conn.client.download(writable, filePath, options.start || undefined)
writable.pipe(readable)
return readable
}
}
I then read from the stream and pipe the output to the response object created from http.createServer() like this:
let stream = store.readStream(file, { start, end })
.on('open', () => stream.pipe(res))
.on('close', () => res.end())
.on('error', err => res.end(err))
Yep, Node.js streams are hard to grasp. Logically, you don't need two streams here. If you want to read from your FTP class as from a stream, you just need to implement a single readable stream. Check this section of the docs out to have an idea how to implement a readable stream from scratch:
class SourceWrapper extends Readable {
constructor(options) {
super(options);
this._source = getLowLevelSourceObject();
// Every time there's data, push it into the internal buffer.
this._source.ondata = (chunk) => {
// If push() returns false, then stop reading from source.
if (!this.push(chunk))
this._source.readStop();
};
// When the source ends, push the EOF-signaling `null` chunk.
this._source.onend = () => {
this.push(null);
};
}
// _read() will be called when the stream wants to pull more data in.
// The advisory size argument is ignored in this case.
_read(size) {
this._source.readStart();
}
}
However, from your example, I can conclude, that conn.client.download() expects a writable stream as an input parameter. In such case you just can take a standard PassThrough stream which is a duplex (i.e. writable on the left and readable on the right side) stream with no transformation applied:
const { PassThrough } = require('stream');
export class FTP {
readStream(filePath, options = {}) {
let conn = this.getConnection(this.name);
if (!conn) return Buffer.from('');
filePath = this.forceRoot(filePath);
const pt = new PassThrough();
conn.client.download(pt, filePath, options.start);
return pt;
}
}
You can find more information on Node.js streams here and here.
UPD: Usage example:
// assume res is an [express or similar] response object.
const s = store.readStream(file, { start, end });
s.pipe(res);
Pipe works the other way round as you're thinking. According to Node.js's documentation, pipe() is a method of Readable, and it accepts a Writable as its destination. What you were trying to do was pipe a Writable to a Readable, but actually it's a Readable that can be piped to a Writeable, not the other way round.
Try passing a PassThrough to download() and return that same PassThrough?
Related
Assuming we're creating an our own custom readable/writeable stream by inheriting from the Stream class. In the first scenario, we use the Readable and the Writable classes:
const Stream = require("stream");
const readableStream = new Stream.Readable();
readableStream._read = (size) => {
for (let i = 1; i < 10; i++) {
readableStream.push(`${i},`);
}
readableStream.push(null);
};
const writeableStream = new Stream.Writable();
writeableStream._write = (chunk, encoding, next) => {
console.log("writeableStream", chunk.toString());
next();
};
readableStream.pipe(writeableStream);
In the second scenario, we use the Duplex class (which inherits from both Readable/Writable):
const { Duplex } = require("stream");
const myDuplexStream = new Duplex();
myDuplexStream._read = (size) => {
for (let i = 1; i < 10; i++) {
myDuplexStream.push(`${i},`);
}
myDuplexStream.push(null);
};
myDuplexStream._write = (chunk, encoding, next) => {
if (Buffer.isBuffer(chunk)) {
chunk = chunk.toString();
}
console.log("write operation", chunk);
};
myDuplexStream.pipe(myDuplexStream); // that's I guess a no-go but we could just use a plain on("readable") event.
Would these 2 implementation be practically equal?
No, they’re not identical.
(A) The first example shows a scenario where integers are pushed to a writable buffer that pipe its data to a writable sink. I.e:
Integers are pushed to a readable buffer
The readable pipe its data to a writable buffer
The write stream will console.log the data
(B) While the second example shows a quirky scenario where data piped into the duplex is logged to the console. And, unrelated to any input, some integers are pushed to a readable buffer each time some other stream calls read() on the duplex. I.e:
(Some unknown source must write/pipe data to the duplex)
The duplex console.logs the streamed input
(no data is forwarded from the duplex writable to the duplex readable, instead a list of integers is pushed to the readable buffer when a subsequent writable calls read())
When a stream writable calls read() on the duplex, it will push the integers 1-9 to the readable buffer, for the writable to consume stream. Then it will close the readable side of the duplex (due to push(null)).
The writable of a duplex handles the incoming data, while the readable provides the the outgoing data. (B) seems to have mixed up the order of the readable/writable in the duplex class.
The OP's Duplex example should probably be:
const { Duplex } = require("stream");
const myDuplexStream = new Duplex();
myDuplexStream.chunkProcessingBuffer = []
myDuplexStream._write = (chunk, encoding, next) => {
console.log("this is my incoming chunk:", chunk.toString());
// Typically, this chunk would be processed in some way before pushing it to the readable interface
chunkProcessingBuffer.push(chunk);
next();
};
myDuplexStream._read = (size) => {
// The readable interface would typically push more than one chunk at the time for each call to read()
console.log("this is my outgoing chunk:", chunk.toString());
myDuplexStream.push(chunkProcessingBuffer.shift());
};
const readableStream = new Stream.Readable();
readableStream._read = (size) => {
for (let i = 1; i < 10; i++) {
readableStream.push(`${i},`);
}
readableStream.push(null);
};
const writeableStream = new Stream.Writable();
writeableStream._write = (chunk, encoding, next) => {
console.log("writeableStream", chunk.toString());
next();
};
readableStream.pipe(myDuplexStream).pipe(writableStream)
I have this function which streams buffered data:
function doChunkInput() {
console.log('Put Objects Chunk');
let stream = new Readable({
read() {}
})
for(i=0; i < 1000; i++) {
stream.push(' data');
stream.push(' more data');
stream.push(' and more data');
}
// Pay attention to this
// null indicates the end of the stream, so the `data` event will be fired
stream.push(null)
const params = {
Bucket: bucket,
Body: stream,
Key: `sitemap.1.xml`,
};
return s3.upload(params).promise();
Is it possible to from a stream without buffering. What I want is to stream data where the content-length can't be calculted. In the above example it is getting calculated in the buffer.
I'm trying to test the chunked upload but with the above example the content-length header is added to the request and the transfer-encoding header is not.
Short answer is no. AWS SDK does not support streaming, but there's a nice way around it.
You can upload in parts of any size (mind you, as per anything AWS, you pay a tiny bit for each upload - lots of parts may grow to a visible cost). There's even a nice npm module called: s3-stream-upload which exposes a normal Writable stream interface that you can pipe to.
You could use my scramjet if you'd prefer to have some special logic around the upload - it's actually quite simple:
const { StringStream } = require("scramjet");
const params = {Key: 'filename', Bucket: 'my-bucket'};
const { UploadId } = await s3.createMultipartUpload(params).promise();
let i = 1;
StringStream
.from(function*() {
for(i=0; i < 1000; i++) {
yield ' data';
yield ' more data';
yield ' and more data';
}
})
.toBufferStream()
.breakup(1048576) // you'd buffer every 1meg
.map(
(Body) => s3.uploadPart({
...params,
Body,
UploadId,
PartNumber: i++
}).promise()
)
.toArray()
.then(
parts => s3.completeMultipartUpload({
...params,
MultipartUpload: {
Parts: parts.map(
({ ETag }, i) => ({ ETag, PartNumber: i + 1 })
)
}
}).promise()
)
.catch(
e => {
console.error("error, aborting", e);
return s3.abortMultipartUpload({...params, UploadId})
}
)
I'm sorry - I didn't run the example above, so it's more of a guide what should happen - but I can fix up tomorrow if you get stuck.
edit: Now I understand that you want to know how to create a stream and add to it without building the whole stream upfront
I changed the answer above to create a stream from generator. You could also implement the read method in your example to return the 'data' via this.push (see docs here), but I guess it's way simpler with scramjet and generators.
I am trying to track the progress of a pipe from a read stream to write stream so I can display the progress to the user.
My original idea was to track progress when the data event is emitted as shown here:
const fs = require('fs');
let final = fs.createWriteStream('output');
fs.createReadStream('file')
.on('close', () => {
console.log('done');
})
.on('error', (err) => {
console.error(err);
})
.on('data', (data) => {
console.log("data");
/* Calculate progress */
})
.pipe(final);
However I realized just cause it was read, doesn't mean it was actually written. This can be seen if the pipe is removed, as the data event still emits.
How can track write progress when piping with Node.js?
You can use a dummy Transform stream like this:
const stream = require('stream');
let totalBytes = 0;
stream.pipeline(
fs.createReadStream(from_file),
new stream.Transform({
transform(chunk, encoding, callback) {
totalBytes += chunk.length;
console.log(totalBytes);
this.push(chunk);
callback();
}
}),
fs.createWriteStream(to_file),
err => {
if (err)
...
}
);
You can do the piping manually, and make use of the callback from writable.write()
callback: < function > Callback for when this chunk of data is flushed
const fs = require('fs');
let from_file = `<from_file>`;
let to_file = '<to_file>';
let from_stream = fs.createReadStream(from_file);
let to_stream = fs.createWriteStream(to_file);
// get total size of the file
let { size } = fs.statSync(from_file);
let written = 0;
from_stream.on('data', data => {
// do the piping manually here.
to_stream.write(data, () => {
written += data.length;
console.log(`written ${written} of ${size} bytes (${(written/size*100).toFixed(2)}%)`);
});
});
Somehow I remember this thread being about memory efficiency, anyway, I've rigged up a small script that's very memory efficient and tracks progress very well. I tested it under a 230MB file and the result speaks for itself. https://gist.github.com/J-Cake/78ce059972595823243526e022e327e4
The sample file I used was a bit weird as the content-length header it reported was in fact off but the program uses no more than 4.5 MiB of memory.
I'm trying to make a transform stream flow that is taking data from socket.io, converting it to JSON, and then sending it to stdout. I am totally perplexed as to why data just seems to go right through without any transformation. I'm using the through2 library. Here is my code:
getStreamNames().then(streamNames => {
const socket = io(SOCKETIO_URL);
socket.on('connect', () => {
socket.emit('Subscribe', {subs: streamNames});
});
const stream = through2.obj(function (chunk, enc, callback) {
callback(null, parseString(chunk))
}).pipe(through2.obj(function (chunk, enc, callback) {
callback(null, JSON.stringify(chunk));
})).pipe(process.stdout);
socket.on('m', data => stream.write(data));
},
);
getStreamNames returns a promise which resolves to an array of stream names (i'm calling an external socket.io API) and parseString takes a string returned from the API and converts it to JSON so it's manageable.
What I'm looking for is my console to print out the stringify'd JSON after I parse it using parseString and then make it stdout-able with JSON.stringify. What is actually happening is the data is going right through the stream and doing no transformation.
For reference, the data coming from the API is in a weird format, something like
field1~field2~0x23~fieldn
and so that's why I need the parseString method.
I must be missing something. Any ideas?
EDIT:
parseString:
function(value) {
var valuesArray = value.split("~");
var valuesArrayLenght = valuesArray.length;
var mask = valuesArray[valuesArrayLenght - 1];
var maskInt = parseInt(mask, 16);
var unpackedCurrent = {};
var currentField = 0;
for (var property in this.FIELDS) {
if (this.FIELDS[property] === 0) {
unpackedCurrent[property] = valuesArray[currentField];
currentField++;
}
else if (maskInt & this.FIELDS[property]) {
if (property === 'LASTMARKET') {
unpackedCurrent[property] = valuesArray[currentField];
}
else {
unpackedCurrent[property] = parseFloat(valuesArray[currentField]);
}
currentField++;
}
}
return unpackedCurrent;
};
Thanks
The issue is that the stream you're writing, is actually process.stdout, because .pipe returns the last stream.Writable, so you can keep chaining, in your case, process.stdout.
const x = stream.pipe(stream2).pipe(stream3).pipe(process.stdout);
x === process.stdout // true
So all you were doing was: process.stdout.write(data) without going through the pipeline.
What you need to do, is assign your first through2 stream to the stream variable, and then .pipe on that stream.
const stream = through2.obj((chunk, enc, callback) => {
callback(null, parseString(chunk))
});
stream
.pipe(through2.obj((chunk, enc, callback) => {
callback(null, JSON.stringify(chunk));
}))
.pipe(process.stdout);
socket.on('m', data => stream.write(data));
I need to run two commands in series that need to read data from the same stream.
After piping a stream into another the buffer is emptied so i can't read data from that stream again so this doesn't work:
var spawn = require('child_process').spawn;
var fs = require('fs');
var request = require('request');
var inputStream = request('http://placehold.it/640x360');
var identify = spawn('identify',['-']);
inputStream.pipe(identify.stdin);
var chunks = [];
identify.stdout.on('data',function(chunk) {
chunks.push(chunk);
});
identify.stdout.on('end',function() {
var size = getSize(Buffer.concat(chunks)); //width
var convert = spawn('convert',['-','-scale',size * 0.5,'png:-']);
inputStream.pipe(convert.stdin);
convert.stdout.pipe(fs.createWriteStream('half.png'));
});
function getSize(buffer){
return parseInt(buffer.toString().split(' ')[2].split('x')[0]);
}
Request complains about this
Error: You cannot pipe after data has been emitted from the response.
and changing the inputStream to fs.createWriteStream yields the same issue of course.
I don't want to write into a file but reuse in some way the stream that request produces (or any other for that matter).
Is there a way to reuse a readable stream once it finishes piping?
What would be the best way to accomplish something like the above example?
You have to create duplicate of the stream by piping it to two streams. You can create a simple stream with a PassThrough stream, it simply passes the input to the output.
const spawn = require('child_process').spawn;
const PassThrough = require('stream').PassThrough;
const a = spawn('echo', ['hi user']);
const b = new PassThrough();
const c = new PassThrough();
a.stdout.pipe(b);
a.stdout.pipe(c);
let count = 0;
b.on('data', function (chunk) {
count += chunk.length;
});
b.on('end', function () {
console.log(count);
c.pipe(process.stdout);
});
Output:
8
hi user
The first answer only works if streams take roughly the same amount of time to process data. If one takes significantly longer, the faster one will request new data, consequently overwriting the data still being used by the slower one (I had this problem after trying to solve it using a duplicate stream).
The following pattern worked very well for me. It uses a library based on Stream2 streams, Streamz, and Promises to synchronize async streams via a callback. Using the familiar example from the first answer:
spawn = require('child_process').spawn;
pass = require('stream').PassThrough;
streamz = require('streamz').PassThrough;
var Promise = require('bluebird');
a = spawn('echo', ['hi user']);
b = new pass;
c = new pass;
a.stdout.pipe(streamz(combineStreamOperations));
function combineStreamOperations(data, next){
Promise.join(b, c, function(b, c){ //perform n operations on the same data
next(); //request more
}
count = 0;
b.on('data', function(chunk) { count += chunk.length; });
b.on('end', function() { console.log(count); c.pipe(process.stdout); });
You can use this small npm package I created:
readable-stream-clone
With this you can reuse readable streams as many times as you need
For general problem, the following code works fine
var PassThrough = require('stream').PassThrough
a=PassThrough()
b1=PassThrough()
b2=PassThrough()
a.pipe(b1)
a.pipe(b2)
b1.on('data', function(data) {
console.log('b1:', data.toString())
})
b2.on('data', function(data) {
console.log('b2:', data.toString())
})
a.write('text')
I have a different solution to write to two streams simultaneously, naturally, the time to write will be the addition of the two times, but I use it to respond to a download request, where I want to keep a copy of the downloaded file on my server (actually I use a S3 backup, so I cache the most used files locally to avoid multiple file transfers)
/**
* A utility class made to write to a file while answering a file download request
*/
class TwoOutputStreams {
constructor(streamOne, streamTwo) {
this.streamOne = streamOne
this.streamTwo = streamTwo
}
setHeader(header, value) {
if (this.streamOne.setHeader)
this.streamOne.setHeader(header, value)
if (this.streamTwo.setHeader)
this.streamTwo.setHeader(header, value)
}
write(chunk) {
this.streamOne.write(chunk)
this.streamTwo.write(chunk)
}
end() {
this.streamOne.end()
this.streamTwo.end()
}
}
You can then use this as a regular OutputStream
const twoStreamsOut = new TwoOutputStreams(fileOut, responseStream)
and pass it to to your method as if it was a response or a fileOutputStream
If you have async operations on the PassThrough streams, the answers posted here won't work.
A solution that works for async operations includes buffering the stream content and then creating streams from the buffered result.
To buffer the result you can use concat-stream
const Promise = require('bluebird');
const concat = require('concat-stream');
const getBuffer = function(stream){
return new Promise(function(resolve, reject){
var gotBuffer = function(buffer){
resolve(buffer);
}
var concatStream = concat(gotBuffer);
stream.on('error', reject);
stream.pipe(concatStream);
});
}
To create streams from the buffer you can use:
const { Readable } = require('stream');
const getBufferStream = function(buffer){
const stream = new Readable();
stream.push(buffer);
stream.push(null);
return Promise.resolve(stream);
}
What about piping into two or more streams not at the same time ?
For example :
var PassThrough = require('stream').PassThrough;
var mybiraryStream = stream.start(); //never ending audio stream
var file1 = fs.createWriteStream('file1.wav',{encoding:'binary'})
var file2 = fs.createWriteStream('file2.wav',{encoding:'binary'})
var mypass = PassThrough
mybinaryStream.pipe(mypass)
mypass.pipe(file1)
setTimeout(function(){
mypass.pipe(file2);
},2000)
The above code does not produce any errors but the file2 is empty