Reading large files in Node.js from another server

Reading large files in Node.js from another server - javascript

I have two servers that communicate with each other. Server1 requests for parts of the file from Server2 and store the data received into one file. Server2 is supposed to receive each of these requests and create a stream pipes the data over.
Suppose the files stored(directory) in Server 2 are as following
bigfile.gz
bigfile.gz.part-0
bigfile.gz.part-1
bigfile.gz.part-2
......
So Server1 will send a request for part-0 then part-1 and so on to the Server2. Hence the use of the loop to make requests.
Server 1 (code snippet)
for (var i in requestInfo['blockName']) {
var blockName = i;
var IP = requestInfo['blockName'][i][0];
var fileData = JSON.stringify({
blockName: blockName,
fileName: requestInfo['fileName']
});
makeRequest(fileData, IP);
console.log(counter);
}
function makeRequest(fileData, IP) {
var options = {
host: IP,
port: 5000,
path: '/read',
method: 'POST',
headers: {
'Content-Type': 'application/json'
}
};
var req = http.request(options, function(res) {
var data = '';
res.on('data', function(chunk) {
data += chunk;
});
res.on('end', function() {
console.log(data.length);
//fs.appendFileSync(fileName, data);
var writeStream = fs.createWriteStream(fileName, { "flags": 'a' });
writeStream.write(data);
writeStream.end();
});
});
req.write(fileData);
req.end();
}
Server 2 (code snippet)
app.post('/read', function(req, res) {
var dataBody = req.body;
fs.createReadStream(dataBody.fileName + '/' + dataBody.blockName).pipe(res);
});
The one above works for when I test it with a 100MB txt file. But it fails when i have 1GB .gz file or even when I test it with a .zip file the output the final .zip generated on the Server 1 side is the incorrect size.
I am not sure what I am doing wrong here or is the alternate solution
EDIT:
Also my Server1 crashes when dealing with the big 1GB .gz file

Your main problem here is that you treating your data as string by appending chunks to a string.
By rewriting this should be
var req = http.request(options, function(res) {
var data = [];
res.on('data', function(chunk) {
data.push(chunk);
});
res.on('end', function() {
fs.writeFile(fileName, Buffer.concat(data), function() {
console.log("write end")
});
});
});
That way we are creating a big array of binary chunks, and when the download is complete we write the concatenation of all the chunks to a file.
But notice the word big
If you stick with this implementation you are risking to get out of memory, especially if you are dealing with large (>500mb) files.
Streams to the rescue
var req = https.request(options, function(res) {
res.pipe(fs.createWriteStream(fileName)).on("close", function() {
console.log("write end");
});
});
Using the above implementation memory footprint should stay low. Because the moment you get a specific amount of data from your download, you write them to the file. That way you never keep the whole file into the program's memory.

Related

Upload file with nodeJS

I am having trouble uploading a file with nodeJS and Angular.
I found solutions but it's only with Ajax which I don't know about. Is it possible to do without?
With the following code I get this error :
POST http://localhost:2000/database/sounds 413 (Payload Too Large)
Code:
HTML:
<div class="form-group">
<label for="upload-input">This needs to be a .WAV file</label>
<form enctype="multipart/form-data" action="/database/sounds" method="post">
<input type="file" class="form-control" name="uploads[]" id="upload-input" multiple="multiple">
</form>
<button class="btn-primary" ng-click="uploadSound()">UPLOAD</button>
</div>
Javascript:
$scope.uploadSound = function(){
var x = document.getElementById("upload-input");
if ('files' in x) {
if (x.files.length == 0) {
console.log("Select one or more files.");
} else {
var formData = new FormData();
for (var i = 0; i < x.files.length; i++) {
var file = x.files[i];
if(file.type==("audio/wav")){
console.log("Importing :");
if ('name' in file) {
console.log("-name: " + file.name);
}
if ('size' in file) {
console.log("-size: " + file.size + " bytes");
}
formData.append('uploads[]', file, file.name);
}else{
console.log("Error with: '"+file.name+"': the type '"+file.type+"' is not supported.");
}
}
$http.post('/database/sounds', formData).then(function(response){
console.log("Upload :");
console.log(response.data);
});
}
}
}
NodeJS:
//Upload a sound
app.post('/database/sounds', function(req, res){
var form = new formidable.IncomingForm();
// specify that we want to allow the user to upload multiple files in a single request
form.multiples = true;
// store all uploads in the /uploads directory
form.uploadDir = path.join(__dirname, '/database/sounds');
// every time a file has been uploaded successfully,
// rename it to it's orignal name
form.on('file', function(field, file) {
fs.rename(file.path, path.join(form.uploadDir, file.name));
});
// log any errors that occur
form.on('error', function(err) {
console.log('An error has occured: \n' + err);
});
// once all the files have been uploaded, send a response to the client
form.on('end', function() {
res.end('success');
});
// parse the incoming request containing the form data
form.parse(req);
});
EDIT:
The error became
POST http://localhost:2000/database/sounds 400 (Bad Request)

If your are using bodyParser
app.use(bodyParser.urlencoded({limit: '100mb',extended: true}));
app.use(bodyParser.json({limit: '100mb'}));
This will allow you to upload files upto 100mb

For json/urlencoded limit, it’s recommended to configure them in server/config.json as follows:
{
“remoting”: {
“json”: {“limit”: “50mb”},
“urlencoded”: {“limit”: “50mb”, “extended”: true}
}
Please note loopback REST api has its own express router with bodyParser.json/urlencoded middleware. When you add a global middleware, it has to come before the boot() call.
var loopback = require('loopback');
var boot = require('loopback-boot');
var app = module.exports = loopback();
//request limit 1gb
app.use(loopback.bodyParser.json({limit: 524288000}));
app.use(loopback.bodyParser.urlencoded({limit: 524288000, extended: true}));

With regards to checking that the data is actually a WAV file, your best bet is to look at the contents of the file and determine if it looks like a WAV file or not.
The WAVE PCM soundfile format article goes into the details of the format.
To be absolutely sure that this is a proper WAV file, and it's not broken in some way, you need to check that all of the fields defined there make sense. But a quick solution, might be to just check that the first four bytes of the content are the letters 'RIFF'. It won't guard against corrupted files or malicious content, but it's a good place to start I think.

I tried to change the object sent to url params as said in Very Simple AngularJS $http POST Results in '400 (Bad Request)' and 'Invalid HTTP status code 400' :
$http.post({
method: 'POST',
url: '/upload',
data: formData,
headers: {'Content-Type': 'application/x-www-form-urlencoded'},
transformRequest: function(obj) {
var str = [];
for(var p in obj)
str.push(encodeURIComponent(p) + "=" + encodeURIComponent(obj[p]));
return str.join("&");
}
}).success(function(response){
console.log("Uploaded :");
console.log(response.data);
});
But I get a bad request error
Why is there no data received ? The console.log before this shows I have one file in my formData.
Error: $http:badreq
Bad Request Configuration
Http request configuration url must be a string or a $sce trusted
object. Received: {"method":"POST","url":"/upload","data":
{},"headers":{"Content-Type":"application/x-www-form-urlencoded"}}

Writing an image to file, received over an HTTP request in Node

I'm certain I'm missing something obvious, but the gist of the problem is I'm receiving a PNG from a Mapbox call with the intent of writing it to the file system and serving it to the client. I've successfully relayed the call, received a response of raw data and written a file. The problem is that my file ends up truncated no matter what path I take, and I've exhausted the answers I've found skirting the subject. I've dumped the raw response to the log, and it's robust, but any file I make tends to be about a chunk's worth of unreadable data.
Here's the code I've got at present for the file making. I tried this buffer move as a last ditch after several failed and comparably fruitless iterations. Any help would be greatly appreciated.
module.exports = function(req, res, cb) {
var cartography = function() {
return https.get({
hostname: 'api.mapbox.com',
path: '/v4/mapbox.wheatpaste/' + req.body[0] + ',' + req.body[1] + ',6/750x350.png?access_token=' + process.env.MAPBOX_API
}, function(res) {
var body = '';
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
var mapPath = 'map' + req.body[0] + req.body[1] + '.png';
var map = new Buffer(body, 'base64');
fs.writeFile(__dirname + '/client/images/maps/' + mapPath, map, 'base64', function(err) {
if (err) throw err;
cb(mapPath);
})
})
});
};
cartography();
};

It is possible to rewrite your code in more compact subroutine:
const fs = require('fs');
const https = require('https');
https.get(url, (response)=> { //request itself
if(response) {
let imageName = 'image.png'; // for this purpose I usually use crypto
response.pipe( //pipe response to a write stream (file)
fs.createWriteStream( //create write stream
'./public/' + imageName //create a file with name image.png
)
);
return imageName; //if public folder is set as default in app.js
} else {
return false;
}
})
You could get original name and extension from url, but it safer to generate a new name with crypto and get file extension like i said from url or with read-chunk and file-type modules.

Why doesn't my http request work for long JSON string?

var http = require('http');
var qhttp = require('q-io/http');
var formidable = require('formidable');
var categories;
qhttp.read("https://api.myjson.com/bins/509wa")
.then(function (json) {
categories = JSON.parse(json).categories;
})
.then(null, console.error);
module.exports.putCat = function(req, res){
var form = new formidable.IncomingForm();
form.parse(req, function(error, fields, files){
if(error){console.log(error)}
fields["catProd"] = [];
categories.push(fields);
var dataString = JSON.stringify({categories: categories});
console.log(dataString);
var options = {
host : "api.myjson.com",
path : "/bins/509wa.json",
method: "PUT",
headers: {"Content-Type": "application/json", "Content-Length": dataString.length}
};
function callback(response){
var body = "";
response.on('data', function(chunk){
body+=chunk;
});
response.on('end', function(){
console.log('Received data: '+body);
});
}
http.request(options, callback).write(dataString);
res.end();
});
};
screenshot
It works perfectly with something like JSON.stringify("hello":"world");. However, when I tried with my data that needs to be stored (which is much longer), it doesn't send anything to the API. Thanks in advance!

You have a race condition with the categories variable. If some external code calls putCat() quickly after loading the module, then the categories data may not be available yet.
If you have async module loading things to do, then you should probably expose a module constructor which returns a promise and you can then do the putCat() after that promise resolves.

NodeJS HTTP request POST ERROR socket hang up

Hi I'm having problems to perform HTTP request on NodeJS given a larger number array of json object. The request works fine given small array of json object. However, if I try to increase the size array of json, I received Error: socket hang up {"error":{"code":"ECONNRESET"}}. Is it required to perform multiple write? Or is it something wrong going on at the other end?
Thanks in advance for taking your time here!
// data is a json object
var post_data = JSON.stringify(data);
var buf = new Buffer(post_data);
var len = buf.length;
var options = {
hostname: address,
port: port,
path: pathName,
method: 'PUT',
headers: {
'Content-Type':'application/json',
'Content-Length': len,
'Transfer-Encoding':'chunked'
}
};
// http call to REST API server
var req = restHttp.request(options, function(res) {
console.log('server PUT response received.');
var resData = '';
res.on('data', function(replyData) {
// Check reply data for error.
console.log(replyData.toString('utf8'));
if(replyData !== 'undefined')
resData += replyData;
});
res.on('end', function() {
callback(JSON.parse(resData));
});
});
req.write(buf);
req.end();

You can stream the request body.
If the data in buf was in a readable stream then you can just do buf.pipe(req).
For example, if the current directory contains a file data.json with the JSON you can do
var buf = fs.createReadStream(__dirname + '/data.json');
to create a ReadStream object. Then you can pipe this to you req
buf.pipe(req);
The pipe command will call req.end once its done streaming.

How to download and unzip a zip file in memory in NodeJs?

I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?
Here is what I tried:
var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';
var request = require('request'), fs = require('fs'), zlib = require('zlib');
request.get(url, function(err, res, file) {
if(err) throw err;
zlib.unzip(file, function(err, txt) {
if(err) throw err;
console.log(txt.toString()); //outputs nothing
});
});
[EDIT]
As, suggested, I tried using the adm-zip library and I still cannot make this work:
var ZipEntry = require('adm-zip/zipEntry');
request.get(url, function(err, res, zipFile) {
if(err) throw err;
var zip = new ZipEntry();
zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));
var text = zip.getData();
console.log(text.toString()); // fails
});

You need a library that can handle buffers. The latest version of adm-zip will do:
npm install adm-zip
My solution uses the http.get method, since it returns Buffer chunks.
Code:
var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';
var AdmZip = require('adm-zip');
var http = require('http');
http.get(file_url, function(res) {
var data = [], dataLen = 0;
res.on('data', function(chunk) {
data.push(chunk);
dataLen += chunk.length;
}).on('end', function() {
var buf = Buffer.alloc(dataLen);
for (var i = 0, len = data.length, pos = 0; i < len; i++) {
data[i].copy(buf, pos);
pos += data[i].length;
}
var zip = new AdmZip(buf);
var zipEntries = zip.getEntries();
console.log(zipEntries.length)
for (var i = 0; i < zipEntries.length; i++) {
if (zipEntries[i].entryName.match(/readme/))
console.log(zip.readAsText(zipEntries[i]));
}
});
});
The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.
Update
This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.
var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';
var AdmZip = require('adm-zip');
var request = require('request');
request.get({url: file_url, encoding: null}, (err, res, body) => {
var zip = new AdmZip(body);
var zipEntries = zip.getEntries();
console.log(zipEntries.length);
zipEntries.forEach((entry) => {
if (entry.entryName.match(/readme/i))
console.log(zip.readAsText(entry));
});
});
The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.

Sadly you can't pipe the response stream into the unzip job as node zlib lib allows you to do, you have to cache and wait the end of the response. I suggest you to pipe the response to a fs stream in case of big files, otherwise you will full fill your memory in a blink!
I don't completely understand what you are trying to do, but imho this is the best approach. You should keep your data in memory only the time you really need it, and then stream to the csv parser.
If you want to keep all your data in memory you can replace the csv parser method fromPath with from that takes a buffer instead and in getData return directly unzipped
You can use the AMDZip (as #mihai said) instead of node-zip, just pay attention because AMDZip is not yet published in npm so you need:
$ npm install git://github.com/cthackers/adm-zip.git
N.B. Assumption: the zip file contains only one file
var request = require('request'),
fs = require('fs'),
csv = require('csv')
NodeZip = require('node-zip')
function getData(tmpFolder, url, callback) {
var tempZipFilePath = tmpFolder + new Date().getTime() + Math.random()
var tempZipFileStream = fs.createWriteStream(tempZipFilePath)
request.get({
url: url,
encoding: null
}).on('end', function() {
fs.readFile(tempZipFilePath, 'base64', function (err, zipContent) {
var zip = new NodeZip(zipContent, { base64: true })
Object.keys(zip.files).forEach(function (filename) {
var tempFilePath = tmpFolder + new Date().getTime() + Math.random()
var unzipped = zip.files[filename].data
fs.writeFile(tempFilePath, unzipped, function (err) {
callback(err, tempFilePath)
})
})
})
}).pipe(tempZipFileStream)
}
getData('/tmp/', 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip', function (err, path) {
if (err) {
return console.error('error: %s' + err.message)
}
var metadata = []
csv().fromPath(path, {
delimiter: '|',
columns: true
}).transform(function (data){
// do things with your data
if (data.NAME[0] === '#') {
metadata.push(data.NAME)
} else {
return data
}
}).on('data', function (data, index) {
console.log('#%d %s', index, JSON.stringify(data, null, ' '))
}).on('end',function (count) {
console.log('Metadata: %s', JSON.stringify(metadata, null, ' '))
console.log('Number of lines: %d', count)
}).on('error', function (error) {
console.error('csv parsing error: %s', error.message)
})
})

If you're under MacOS or Linux, you can use the unzip command to unzip from stdin.
In this example I'm reading the zip file from the filesystem into a Buffer object but it works
with a downloaded file as well:
// Get a Buffer with the zip content
var fs = require("fs")
, zip = fs.readFileSync(__dirname + "/test.zip");
// Now the actual unzipping:
var spawn = require('child_process').spawn
, fileToExtract = "test.js"
// -p tells unzip to extract to stdout
, unzip = spawn("unzip", ["-p", "/dev/stdin", fileToExtract ])
;
// Write the Buffer to stdin
unzip.stdin.write(zip);
// Handle errors
unzip.stderr.on('data', function (data) {
console.log("There has been an error: ", data.toString("utf-8"));
});
// Handle the unzipped stdout
unzip.stdout.on('data', function (data) {
console.log("Unzipped file: ", data.toString("utf-8"));
});
unzip.stdin.end();
Which is actually just the node version of:
cat test.zip | unzip -p /dev/stdin test.js
EDIT: It's worth noting that this will not work if the input zip is too big to be read in one chunk from stdin. If you need to read bigger files, and your zip file contains only one file, you can use funzip instead of unzip:
var unzip = spawn("funzip");
If your zip file contains multiple files (and the file you want isn't the first one) I'm afraid to say you're out of luck. Unzip needs to seek in the .zip file since zip files are just a container, and unzip may just unzip the last file in it. In that case you have to save the file temporarily (node-temp comes in handy).

Two days ago the module node-zip has been released, which is a wrapper for the JavaScript only version of Zip: JSZip.
var NodeZip = require('node-zip')
, zip = new NodeZip(zipBuffer.toString("base64"), { base64: true })
, unzipped = zip.files["your-text-file.txt"].data;

Develop Reference

JavaScript is the programming language of the Web.

Reading large files in Node.js from another server - javascript

Related

Upload file with nodeJS

Writing an image to file, received over an HTTP request in Node

Why doesn't my http request work for long JSON string?

NodeJS HTTP request POST ERROR socket hang up

How to download and unzip a zip file in memory in NodeJs?

Categories

Resources