Download large file with node.js avoiding high memory consumption

Download large file with node.js avoiding high memory consumption - javascript

I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"),
http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
events = require("events");
var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";
var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()
var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();
var dlprogress = 0;
setInterval(function () {
sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);
request.addListener('response', function (response) {
response.setEncoding('binary')
sys.puts("File size: " + response.headers['content-length'] + " bytes.")
var body = '';
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
body += chunk;
});
response.addListener("end", function() {
fs.writeFileSync(filename, body, 'binary');
sys.puts("After download finished");
});
});

I changed the callback to:
request.addListener('response', function (response) {
var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
downloadfile.write(chunk, encoding='binary');
});
response.addListener("end", function() {
downloadfile.end();
sys.puts("Finished downloading " + filename);
});
});
This worked perfectly.

does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))

Take a look at http-request:
// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.buffer.toString());
});
// save the response to 'myfile.bin' with a progress callback
http.get({
url: 'http://localhost/get',
progress: function (current, total) {
console.log('downloaded %d bytes from %d', current, total);
}
}, 'myfile.bin', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.file);
});

When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) {
var requestserver = http.request(options, function(r) {
console.log('STATUS: ' + r.statusCode);
console.log('HEADERS: ' + JSON.stringify(r.headers));
var fd = fs.openSync('sai.tar.gz', 'w');
r.on('data', function (chunk) {
size += chunk.length;
console.log(size+'bytes received');
sendstatus(res,size);
fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
});
});
r.on('end',function(){
console.log('\nended from server');
fs.closeSync(fd);
sendendstatus(res);
});
});
}

Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.

Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});
var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);
r.on('data', function(data) {
inspect('binary data received');
});
downloadFile.on('end', function () {
inspect(downloadPath, 'file downloaded to path');
});
downloadFile.on('error', function (err) {
inspect(err, 'error downloading file');
});
You may need to install modules which you can do via
npm install filed request eyespect temp

Related

Formidable/NodeJS HTTP Server JPG save

Stackoverflow JS Genius's!
I have an issue with my current project, it's using node's HTTP createServer, using Formidable to parse the body data.
See code below. (http-listener.js)
var listenport = 7200;
const server = http.createServer((req, res) => {
// Set vars ready
var data = '';
var plateImg = '';
var overview1 = '';
var overview2 = '';
new formidable.IncomingForm().parse(req)
// I originally thought it was sent in files, but it isnt, it's fields.
.on('file', function(name, file) {
console.log('Got file:', name);
})
// This is the correct procedure for my issue.
.on('field', function(name, field) {
console.log('Got a field:', name);
if(name.toLowerCase() === "anpr.xml")
{
// DO PARSE INTO JSON! This works, all is well.
xml2js.parseString(field, {explicitArray:false, ignoreAttrs:true}, function (err, result)
{
if(err)
{
alert('Parse: '+err);
}
// Console log parsed json data.
console.log("Read: "+result.EventNotificationAlert.ANPR.licensePlate);
console.log(result);
data = result;
});
}
if(name.toLowerCase() === "licenseplatepicture.jpg")
{
plateImg = field
// This doesnt work?
// I need to store these fields as an image. ? Is this possible with it being sent as a field and not as a file upload.
// This is the only option I have as I can't control the client sending this data (It's a camera)
fs.writeFile(config.App.ImageDir+'/Plate.jpg', plateImg, function(err) {
if(err)console.log(err);
});
}
if(name.toLowerCase() === "detectionpicture.jpg")
{
if(overview1 == '')
{
overview1 = field;
}
else if(overview2 == '')
{
overview2 = field;
}
else
{
// do nothing else.
console.log("Couldn't send images to variable.");
}
}
})
.on('error', function(err) {
alert(err);
})
.on('end', function() {
// Once finished, send to ANPR data to function to handle data and insert to database. WORKS
// Call anpr function.
ANPR_ListenData(data, plateImg, overview1, overview2, function(result) {
if(result.Status > 0)
{
console.log("Accepted by: "+result.Example);
// reset var
data = '';
plateImg = '';
overview1 = '';
overview2 = '';
res.writeHead(200, {'content-type':'text/html'});
res.end();
}
});
});
});
server.listen(listenport, () => {
console.log('ANPR Server listening on port: ' + listenport);
});
Basically the images that are sent in the fields: licenseplatepicture.jpg etc I want to store them directly into my app image directory.
Unfortunately I have no control over how the chunks are sent to this server due to it being a network camera, I simply need to write a procedure.
The full request chunk is quite large so I will upload the file to OneDrive for you to glance at and understand the request.
Any help with this will be appreciated. I've tried everything I can possibly think of, but the file saves unreadable :(. I don't know where else to look or what else I can try, other than what I've already done & tried.
Request Txt File: https://1drv.ms/t/s!AqAIyFoqrBTO6hTwCimcHDHODqEi?e=pxJY00
Ryan.

I fixed this by using Busboy package instead of Formidable.
This is how my http listener looks like using Busboy.
var inspect = util.inspect;
var Busboy = require('busboy');
http.createServer(function(req, res) {
if (req.method === 'POST') {
//vars
var ref = Math.random().toString(36).substring(5) + Math.random().toString(36).substring(2, 15);;
var xml = '';
var parseXml = '';
var over1, over2 = '';
var i = 0;
var busboy = new Busboy({ headers: req.headers });
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);
if(filename.toLowerCase() === "licenseplatepicture.jpg")
{
var saveTo = config.App.ImageDir+"/"+ref+"_Plate.jpg";
if (!fs.existsSync(saveTo)) {
//file exists
file.pipe(fs.createWriteStream(saveTo));
}
}
if(filename.toLowerCase() === "detectionpicture.jpg")
{
i++;
var saveTo = config.App.ImageDir+"/"+ref+"_Front_"+i+".jpg";
if (!fs.existsSync(saveTo)) {
//file exists
file.pipe(fs.createWriteStream(saveTo));
}
}
file.on('data', function(data) {
if(filename.toLowerCase() === "anpr.xml")
{
xml += data;
}
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
});
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
});
});
busboy.on('field', function(fieldname, val, fieldnameTruncated, valTruncated, encoding, mimetype) {
console.log('Field [' + fieldname + ']: value: ' + inspect(val));
// No fields according to busboy
});
busboy.on('finish', function() {
// DO PARSE INTO JSON! This works, all is well.
xml2js.parseString(xml, {explicitArray:false, ignoreAttrs:true}, function (err, result)
{
if(err)
{
alert('Parse: '+err);
}
// Set parsed var
parseXml = result;
});
var images = '';
if(i = 2)
{
images = `{"Plate":"${ref}_Plate.jpg", "Front":"${ref}_Front_1.jpg", "Overview":"${ref}_Front_2.jpg"}`;
} else {
images = `{"Plate":"${ref}_Plate.jpg", "Front":"${ref}_Front_1.jpg", "Overview":"null"}`;
}
// Once parsed, send on to ANPR listen function.
ANPR_ListenData(ref, parseXml, images, function(result) {
if(result.Status == 1)
{
console.log('Data transfered for: '+parseXml.EventNotificationAlert.ANPR.licensePlate);
console.log('Accepted Camera: '+result.Example);
res.writeHead(200, { Connection: 'close', Location: '/' });
res.end();
}
});
});
req.pipe(busboy);
}
}).listen(7200, function() {
console.log('Listening for requests');
});
Hope this helps someone else in the future. Certainly caused me a lot of a wasted time.
Busboy was the better package to use when I was reading into it more, it makes more sense for what I was attempting to achieve.
Ryan :).
All the best.

How to check download time and execution in Node js?

I wanted to check how long did the downloading of each file and all file takes time if the download is slow or fast, How can we see and measure it on node js ? also would would be better it we can also consider memory. Thank you. By the way the final_list on the code below is an array of urls.
my code on downloading files
var download = function (url, dest, callback) {
request.get(url)
.on('error', function (err) { console.log(err) })
.pipe(fs.createWriteStream(dest))
.on('close', callback);
};
final_list.forEach(function (str) {
var filename = str.split('/').pop();
console.log('Downloading ' + filename);
download(str, filename, function () { console.log('Finished Downloading' + "" + filename) });
});

Ways to measure time:
Use process.hrtime.bigint() (high-resolution).
Use console.time with console.timeEnd.
Measure memory
Use process.memoryUsage().
Here is your code updated with all the above methods.
final_list.forEach(function (str) {
var filename = str.split('/').pop();
console.log('Downloading ' + filename);
// console.time(filename);
var start = process.hrtime.bigint();
download(str, filename, function () {
console.log('Finished Downloading' + "" + filename)
// console.timeEnd(filename);
var end = process.hrtime.bigint();
var took = Number(end - start) / 1000000;
console.log(`${filename}: ${took}ms`);
console.log('Total used memory:', process.memoryUsage().heapUsed);
});
});

NodeJS finish writing the file with pipe before continuing with the next iteration

Similar to this question,
I have a script that downloads a file to a given url via http.get.
How can I make sure the pipe is finished before continuing to the next iteration with just the http/https module??
//nodejs default libs
var fs = require("fs");
var http = require('https');
function dlFile(fullFilePath, dlUrl, fsize, fname){
var file = fs.createWriteStream(fullFilePath); //fullFilePath will dictate where we will save the file + filename.
var rsult ='';
var downloadedFsize;
var stats; //stats of the file will be included here
var request = http.get( dlUrl, function(response) {
let rsult = response.statusCode;
//will respond with a 200 if the file is present
//404 if file is missing
response.pipe(file);
/*pipe writes the file...
how do we stop the iteration while it is not yet finished writing?
*/
console.log(" \n FILE : " + fname);
console.log("File analysis finished : statusCode: " + rsult + " || Saved on " + fullFilePath);
console.log(' \n Downloaded from :' + dlUrl);
console.log(' \n SQL File size is : ' + fsize);
//identify filesize
stats = fs.statSync(fullFilePath);
downloadedFsize = stats["size"]; //0 because the pipe isn't finished yet...
console.log(' actual file size is : ' + downloadedFsize);
}).on('error', function(e) {
console.error(e);
//log that an error happened to the file
}).on('end', function(e){
//tried putting the above script here but nothing happens
});
return rsult;
}
Is there a cleaner approach similar to what I have in mind above? or should I approach this differently? I tried putting the code on .on('end' but it does nothing

The end event is not triggered on the request, instead it is triggered on the response (docs):
response.on("end", function() {
console.log("done");
});

As #Jonas Wilms says, the trigger was indeed on response.
//nodejs default libs
var fs = require("fs");
var http = require('https');
function dlFile(fullFilePath, dlUrl, fsize, fname){
var file = fs.createWriteStream(fullFilePath); //fullFilePath will dictate where we will save the file + filename.
var rsult ='';
var downloadedFsize;
var stats; //stats of the file will be included here
var request = http.get( dlUrl, function(response) {
let rsult = response.statusCode;
//will respond with a 200 if the file is present
//404 if file is missing
response.pipe(file).on('finish', function(e){
console.log(" \n FILE : " + fname);
console.log("File analysis finished : statusCode: " + rsult + " || Saved on " + fullFilePath);
console.log(' \n Downloaded from :' + dlUrl);
console.log(' \n SQL File size is : ' + fsize);
//identify filesize
stats = fs.statSync(fullFilePath);
downloadedFsize = stats["size"];
console.log(' actual file size is : ' + downloadedFsize);
});
/*pipe writes the file above, and output the results once it's done */
}).on('error', function(e) {
console.error(e);
//log that an error happened to the file
}).on('end', function(e){
//tried putting the above script here but nothing happens
});
return rsult;
}

How to end() a file stream

I am having a weird issue with a piece of sample code that I got here, the central part being this:
server.on('request', function(request, response) {
var file = fs.createWriteStream('copy.csv');
var fileSize = request.headers['content-length'];
var uploadedSize = 0;
request.on('data', function (chunk) {
uploadedSize += chunk.length;
uploadProgress = (uploadedSize/fileSize) * 100;
response.write(Math.round(uploadProgress) + "%" + " uploaded\n" );
var bufferStore = file.write(chunk);
console.log(bufferStore);
console.log(chunk);
if(!bufferStore)
{
request.pause();
}
});
file.on('drain', function() {
request.resume();
});
request.on('end', function() {
response.write('Upload done!');
response.end();
});
});
The problem is, the file copy.csv does not contain anything after the process is finished.
I tried to add file.end(); in the request.on('end'-callback, but it did not do the trick. However, if I add faulty code in said callback that causes an exception, the file is being written just fine (although this ofc can't be the final solution).

To notify the stream that there are no more chunks to be read, you can simply call your_stream.push(null). You can read more about streams and push(null) from the excellent substack's stream guide.

Try this structure:
var file = fs.WriteStream('copy.csv');
fileSize = request.headers['content-length'],
uploadedSize = 0;
request.on('readable', function () { // Node.js 0.10 (Streams2 interface)
var newData = this.read() || new Buffer(0); // Sometimes may come null
file.write(newData);
uploadedSize += newData.length;
response.write(Math.round((uploadedSize / fileSize) * 100) + "%" + " uploaded\n" );
});
request.on('end', function () {
response.write('Upload done!');
response.end();
file.end();
});

With node.js how to I set a var to response from HTTP client?

I know this is probably Asynchronous Javascript 101 and I do have some books on the Kindle I could consult, but I am nowhere near my device.
I have a node app with a variable being assigned to a module that I'm loading. The module has one function that downloads a string of data from a URL.
The problem is, how do I not set the variable until the request has returned?
My code looks like this:
Downloader.js:
var http = require('http');
exports.downloadString = function(str) {
console.log("Downloading from " + str);
http.get(str, function(res) {
var data = [];
console.log("Got response: " + res.statusCode);
res.on('data', function (chunk) {
data.push(chunk);
});
res.on('end', function() {
return data.join('');
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
}
app.js:
var downloader = require('./lib/Downloader')
, dateParser = require('./lib/DateParser')
, eventIdGenerator = require('./lib/EventIdGenerator');
var report = downloader.downloadString("http://exampleapi.com");
console.log(report);
I need to wait until the variable named "report" is populated.
Obviously this means my Mocha tests are also failing as I'm still unsure of how to tell the test to wait until the variable is filled.
I'm sure this is extremely simple, but I am drawing a blank!
Thanks!

Node.js is (mostly) asynchronous, so you'd need to pass a callback function to your module:
Downloader.js:
var http = require('http');
exports.downloadString = function(str, callback) {
console.log("Downloading from " + str);
http.get(str, function(res) {
var data = [];
console.log("Got response: " + res.statusCode);
res.on('data', function (chunk) {
data.push(chunk);
});
res.on('end', function() {
callback(data.join(''));
});
}).on('error', function(e) {
console.log("Got error: " + e.message);
});
};
app.js:
var downloader = require('./lib/Downloader')
, dateParser = require('./lib/DateParser')
, eventIdGenerator = require('./lib/EventIdGenerator');
downloader.downloadString("http://exampleapi.com", function(report) {
console.log(report);
});

Develop Reference

JavaScript is the programming language of the Web.

Download large file with node.js avoiding high memory consumption - javascript

does the request package work for your uses? it lets you do things like this: request(downloadurl).pipe(fs.createWriteStream(downloadtohere))

Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.

Related

Formidable/NodeJS HTTP Server JPG save

How to check download time and execution in Node js?

NodeJS finish writing the file with pipe before continuing with the next iteration

How to end() a file stream

With node.js how to I set a var to response from HTTP client?

Categories

Resources