Nodejs: convert string to buffer - javascript

I'm trying to write a string to a socket (socket is called "response"). Here is the code I have sofar (I'm trying to implement a byte caching proxy...):
var http = require('http');
var sys=require('sys');
var localHash={};
http.createServer(function(request, response) {
var proxy = http.createClient(80, request.headers['host'])
var proxy_request = proxy.request(request.method, request.url, request.headers);
proxy_request.addListener('response', function (proxy_response) {
proxy_response.addListener('data', function(x) {
var responseData=x.toString();
var f=50;
var toTransmit="";
var p=0;
var N=responseData.length;
if(N>f){
p=Math.floor(N/f);
var hash="";
var chunk="";
for(var i=0;i<p;i++){
chunk=responseData.substr(f*i,f);
hash=DJBHash(chunk);
if(localHash[hash]==undefined){
localHash[hash]=chunk;
toTransmit=toTransmit+chunk;
}else{
sys.puts("***hit"+chunk);
toTransmit=toTransmit+chunk;//"***EOH"+hash;
}
}
//remainder:
chunk=responseData.substr(f*p);
hash=DJBHash(chunk);
if(localHash[hash]==undefined){
localHash[hash]=chunk;
toTransmit=toTransmit+chunk;
}else{
toTransmit=toTransmit+chunk;//"***EOH"+hash;
}
}else{
toTransmit=responseData;
}
response.write(new Buffer(toTransmit)); /*error occurs here */
});
proxy_response.addListener('end', function() {
response.end();
});
response.writeHead(proxy_response.statusCode, proxy_response.headers);
});
request.addListener('data', function(chunk) {
sys.puts(chunk);
proxy_request.write(chunk, 'binary');
});
request.addListener('end', function() {
proxy_request.end();
});
}).listen(8080);
function DJBHash(str) {
var hash = 5381;
for(var i = 0; i < str.length; i++) {
hash = (((hash << 5) + hash) + str.charCodeAt(i)) & 0xffffffff;
}
if(hash<-1){
hash=hash*-1;
}
return hash;
}
The trouble is, I keep getting a "content encoding error" in Firefox. It's as if the gizipped content isn't being transmitted properly. I've ensured that "toTransmit" is the same as "x" via console.log(x) and console.log(toTransmit).
It's worth noting that if I replace response.write(new Buffer(toTransmit)) with simply response.write(x), the proxy works as expected, but I need to do some payload analysis and then pass "toTransmit", not "x".
I've also tried to response.write(toTransmit) (i.e. without the conversion to buffer) and I keep getting the same content encoding error.
I'm really stuck. I thought I had this problem fixed by converting the string to a buffer as per another thread (http://stackoverflow.com/questions/7090510/nodejs-content-encoding-error), but I've re-opened a new thread to discuss this new problem I'm experiencing.
I should add that if I open a page via the proxy in Opera, I get gobblydeegook - it's as if the gzipped data gets corrupted.
Any insight greatly appreciated.
Many thanks in advance,

How about this?
var responseData = Buffer.from(x, 'utf8');
from: Convert string to buffer Node

Without digging very deep into your code, it seems to me that you might want to change
var responseData=x.toString();
to
var responseData=x.toString("binary");
and finally
response.write(new Buffer(toTransmit, "binary"));

From the docs:
Pure Javascript is Unicode friendly but not nice to binary data. When
dealing with TCP streams or the file system, it's necessary to handle
octet streams. Node has several strategies for manipulating, creating,
and consuming octet streams.
Raw data is stored in instances of the Buffer class. A Buffer is
similar to an array of integers but corresponds to a raw memory
allocation outside the V8 heap. A Buffer cannot be resized.
So, don't use strings for handling binary data.
Change proxy_request.write(chunk, 'binary'); to proxy_request.write(chunk);.
Omit var responseData=x.toString();, that's a bad idea.
Instead of doing substr on a string, use slice on a buffer.
Instead of doing + with strings, use the "concat" method from the buffertools.

Actually, now new Buffer() is deprecated sence node.js v10+, so better to use
Buffer.from(,)
from
response.write(new Buffer(toTransmit));
do
response.write(Buffer.from(toTransmit,'binary'));

Related

Running out of memory writing to a file in NodeJS

I'm processing a very large amount of data that I'm manipulating and storing it in a file. I iterate over the dataset, then I want to store it all in a JSON file.
My initial method using fs, storing it all in an object then dumping it didn't work as I was running out of memory and it became extremely slow.
I'm now using fs.createWriteStream but as far as I can tell it's still storing it all in memory.
I want the data to be written object by object to the file, unless someone can recommend a better way of doing it.
Part of my code:
// Top of the file
var wstream = fs.createWriteStream('mydata.json');
...
// In a loop
let JSONtoWrite = {}
JSONtoWrite[entry.word] = wordData
wstream.write(JSON.stringify(JSONtoWrite))
...
// Outside my loop (when memory is probably maxed out)
wstream.end()
I think I'm using Streams wrong, can someone tell me how to write all this data to a file without running out of memory? Every example I find online relates to reading a stream in but because of the calculations I'm doing on the data, I can't use a readable stream. I need to add to this file sequentially.
The problem is that you're not waiting for the data to be flushed to the filesystem, but instead keep throwing new and new data to the stream synchronously in a tight loop.
Here's an piece of pseudocode that should work for you:
// Top of the file
const wstream = fs.createWriteStream('mydata.json');
// I'm no sure how're you getting the data, let's say you have it all in an object
const entry = {};
const words = Object.keys(entry);
function writeCB(index) {
if (index >= words.length) {
wstream.end()
return;
}
const JSONtoWrite = {};
JSONtoWrite[words[index]] = entry[words[index]];
wstream.write(JSON.stringify(JSONtoWrite), writeCB.bind(index + 1));
}
wstream.write(JSON.stringify(JSONtoWrite), writeCB.bind(0));
You should wrap your data source in a readable stream too. I don't know what is your source, but you have to make sure, it does not load all your data in memory.
For example, assuming your data set come from another file where JSON objects are splitted with end of line character, you could create a Read stream as follow:
const Readable = require('stream').Readable;
class JSONReader extends Readable {
constructor(options={}){
super(options);
this._source=options.source: // the source stream
this._buffer='';
source.on('readable', function() {
this.read();
}.bind(this));//read whenever the source is ready
}
_read(size){
var chunk;
var line;
var lineIndex;
var result;
if (this._buffer.length === 0) {
chunk = this._source.read(); // read more from source when buffer is empty
this._buffer += chunk;
}
lineIndex = this._buffer.indexOf('\n'); // find end of line
if (lineIndex !== -1) { //we have a end of line and therefore a new object
line = this._buffer.slice(0, lineIndex); // get the character related to the object
if (line) {
result = JSON.parse(line);
this._buffer = this._buffer.slice(lineIndex + 1);
this.push(JSON.stringify(line) // push to the internal read queue
} else {
this._buffer.slice(1)
}
}
}}
now you can use
const source = fs.createReadStream('mySourceFile');
const reader = new JSONReader({source});
const target = fs.createWriteStream('myTargetFile');
reader.pipe(target);
then you'll have a better memory flow:
Please note that the picture and the above example are taken from the excellent nodejs in practice book

Unpack a C struct on browser?

EDIT
I found this:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Typed_arrays
Basically if I have something like this:
struct someStruct {
unsigned long id;
char username[16];
float amountDue;
};
on client side I can do:
var ws = new WebSocket("ws://URI");
ws.binaryType = "arraybuffer";
ws.onmessage = function (e) {
var buffer = e.data;
var data_view = new DataView(buffer);
// ... read the data into the buffer ...
var idView = data_view.getUint32(0);
var usernameView = data_view.getUint32(4);
var amountDueView = data_view.getFloat32(20);
};
The problem is that I want to convert them to normal Javascript objects (numbers, strings etc).
Original question
I would send data via websocket packed using a C struct, and unpack on browser using Javascript.
I know modules exists for node.js, but I can't find nothing client-side.
If you're familiar with Python struct, then you may like structjs. It's my attempt at porting Python struct to javascript. As it is, it's for Node, but a client port should be easy.
You won't have issues with alignment or padding (you can specify those explicitly though) for that structure, but you may need to indicate little-endian (by the '<' in the format string) if that's your flavour. You might do like so (I haven't tested this example in any way):
let struct = require("./struct") // Node specific, you need to wrap it.
let someStruct = struct('<I16sf') // This is your struct definition
let ws = new WebSocket("ws://URI");
ws.binaryType = "arraybuffer";
ws.onmessage = e => {
// Unpack using the structure definition. Unpack takes an ArrayBuffer.
let [id, username, amountDue] = someStruct.unpack(e.data);
// Use data...
};
Ok,
https://www.npmjs.com/package/c-struct looks like what you want.
Good luck!
Ok, after some researching, I finally decided this could not be a good idea:
https://justin.harmonize.fm/development/2013/04/28/a-slower-js-msgpack.html
Shortly: javascript is slow in decoding.
Probably it's just more simple to use JSON and Content-Encoding: gzip, if it does not slow down your web app.

Pass object by reference from/to webworker

Is it possible passing an object from/to webWorker from/to main thread by reference? I have read here information about transferable objects.
Chrome 13 introduced sending ArrayBuffers to/from a Web Worker using
an algorithm called structured cloning. This allowed the postMessage()
API to accept messages that were not just strings, but complex types
like File, Blob, ArrayBuffer, and JSON objects. Structured cloning is
also supported in later versions of Firefox.
I just want to pass information, not object with methods. Just something like this (but with a lot of information, a few MB, so that main thread does not have to receive a copy of the object):
var test = {
some: "data"
}
Once you have some data in an object (this: {bla:666, color:"red"}) you will have to copy it and there is no way to avoid it. The reason is, that you don't have control over the memory object is stored in, so you can't transfer it. The only memory that can be transferred is memory allocated for transferable objects - typed arrays.
Therefore if you need some data transferred, you must think in advance and use the transferable interface. Also keep in mind that even when object is copied, the transfer speed is very fast.
I wrote a library that converts object to binary data (therefore transferable), but it isn't faster than native transfer, it's way slower actually. The only advantage is that it allows me to transfer unsupported data types (eg. Function).
There Is An Array 2nd Argument To postMessage
Actually yes, it is possible in, (surprise, Surprise!) Chrome 17+ and Firefox 18+ for certain objects (see here).
// Create a 32MB "file" and fill it.
var uInt8Array = new Uint8Array(1024 * 1024 * 32); // 32MB
for (var i = 0; i < uInt8Array.length; ++i) {
uInt8Array[i] = i;
}
worker.postMessage(uInt8Array.buffer, [uInt8Array.buffer]);
You can also apply this to strings by converting the string to and from an array buffer using FastestSmallestTextEncoderDecoder as shown below.
//inside the worker
var encoderInst = new TextEncoder;
function post_string(the_string){
var my_array_buffer = encoderInst.encode(the_string).buffer;
postMessage( my_array_buffer, [my_array_buffer] );
}
Then, to read the arraybuffer as a string:
// var workerInstance = new Worker("/path/to/file.js");
var decoderInst = new TextDecoder;
workerInstance.onmessage = function decode_buffer(evt){
var buffer = evt.data;
var str = decoderInst.decode(buffer);
console.log("From worker: " + str);
return str;
}
Here is a small interactive example of using a Worker to increment each letter of a string.
var incrementWorker = new Worker("data:text/javascript;base64,"+btoa(function(){
// inside the worker
importScripts("https://dl.dropboxusercontent.com/s/r55397ld512etib/Encode" +
"rDecoderTogether.min.js?dl=0");
const decoderInst = new TextDecoder;
self.onmessage = function(evt){
const u8Array = new Uint8Array(evt.data);
for (var i=0, len=u8Array.length|0; i<len; i=i+1|0) {
++u8Array[i];
}
postMessage(decoderInst.decode(u8Array));
};
} .toString().slice("function(){".length, -"}".length)));
const inputElement = document.getElementById("input");
const encoderInst = new TextEncoder;
(inputElement.oninput = function() {
const buffer = encoderInst.encode(inputElement.value).buffer;
incrementWorker.postMessage(buffer, [buffer]); // pass happens HERE
})();
incrementWorker.onmessage = function(evt){
document.getElementById("output").value = evt.data;
};
<script src="https://dl.dropboxusercontent.com/s/r55397ld512etib/EncoderDecoderTogether.min.js?dl=0" type="text/javascript"></script>
Before: <input id="input" type="text" value="abc123 foobar" /><br />
After: <input id="output" type="text" readonly="" />
Sources: Google Developers and MDN
It is not possible. You have to send the object, update it in the worker and then return the updated version to the main thread.
If you want to pass an object just with information, you only need to pass your object as a string
myWorker.postMessage(JSON.stringify(myObject));
parse the object inside your worker
JSON.parse(myObject)
and finally return your updated object to the main thread.
Take a look also to ParallelJs that is library to work easier with web-workers

Expected performance of MD5 calculation in javascript?

I am trying out calculation of MD5 using javascript and looking at
fastest MD5 Implementation in JavaScript post 'JKM' implementation is suppose to be one of the faster implementations. I am using SparkMD5 which is based of off JKM implementation. However the example provided https://github.com/satazor/SparkMD5/blob/master/test/readme_example.html takes about 10seconds for a 13MB file (~23 seconds with debugger) while the same file takes only 0.03seconds using md5sum function in linux command line. Are these results too slow for javascript implementation or is this poor performance expected?
It is expected.
First, I don't think I need to tell you that JAVASCRIPT IS SLOW. Yes, even with modern JIT optimization etc. JavaScript is still slow.
To show you that it is not your JS implementation's fault, I will do some comparisons with Node.js, so that the browser DOM stuff doesn't get in the way for benchmarking.
Test file generation:
$ dd if=/dev/zero of=file bs=6M count=1
(my server only has 512 MB of RAM and Node.js can't take anything higher than 6M)
Script:
//var md5 = require('crypto-js/md5')
var md5 = require('MD5')
//var md5 = require('spark-md5').hash
//var md5 = require('blueimp-md5').md5
require('fs').readFile('file', 'utf8', function(e, b) { // Using string here to be fair for all md5 engines
console.log(md5(b))
})
(you can uncomment the contestants/benchmarkees)
The result is: (file reading overhead removed)
MD5: 5.250s - 0.072s = 5.178s
crypto-js/md5: 4.914s - 0.072s = 4.842s
Blueimp: 4.904s - 0.072s = 4.832s
MD5 with Node.js binary buffer instead of string: 1.143s - 0.063s = 1.080s
spark: 0.311s - 0.072s = 0.239s
md5sum: 0.023s - 0.003s = 0.020s
So no, spark-md5 is in reality not bad at all.
When looking at the example HTML page, I saw that they are using the incremental API. So I did another benchmark:
var md5 = require('spark-md5')
var md5obj = new md5()
var chunkNum = 0
require('fs').createReadStream('file')
.on('data', function (b) {
chunkNum ++
md5obj.append(b.toString())
})
.on('end', function () {
console.log('total ' + chunkNum + ' chunks')
console.log(md5obj.end())
})
With 96 chunks, it is 0.313s.
So no, it is not the MD5 implementation's fault at all. Performance this poor is TBH a little surprising, but not totally impossible as well, you are running this code in a browser.
BTW, my server is a DigitalOcean VPS with SSD. The file reading overhead is about 0.072s:
require('fs').readFile('file', 'utf8', function() {})
while with native cat it's about 0.003s.
For MD5 with native Buffer, the overhead is about 0.063s:
require('fs').readFile('file', function() {})

Node.JS - How to stop a piped stream conditionally

I'm a newbee to java-script & nodejs...
I have a file processing-stream which is composed of multiple streams pipped together. Works great...I'd like to enhance this by conditionally stopping the stream from processing and reading when a threshold has been reached.
inputStream.
pipe(unzip()).
pipe(latin1Decoder).
pipe(
combine(
through(function(obj){
//part-1 -
this.push(obj);
}),
through(function(obj){
//part-2-
this.push(obj);
})
));
In part-1 if I do this.push(null) then the combiner will ignore incoming input.
However, I can't stop reading the file. That's annoying because the file is pretty huge.
What is the best way for me to access the input stream from inside the pipe-line to close it?
OK, here is how I ended up solving it:
var cnt = 0;
var processingStream = combine(unzip(), latin1Decoder(), part1(), part2());
inputStream.pipe(processingStream);
inputStream.on('data', function(x) {
var lineCnt = x.toString().split(/\r\n|\r|\n/).length;
cnt += lineCnt;
if (cnt > 5) {
inputStream.close();
inputStream.unpipe(processingStream);
processingStream.end();
}
});
Probably not the most efficient way, but it meets my requirements.
Please note that inputstream reads in blocks so multiple lines are read at once.

Categories

Resources