Error while downloading multiple files from AWS S3 using Nodejs - javascript

I am getting error "File download failed with error write after end" while trying to download multiple files form AWS s3 using the below code snippet.
Can some one help me to figure out the root cause of the error?It would be great if any one can suggest a solution as well
Below code works without any issues if only a single file needs to be downloaded
var AWS = require('aws-sdk');
var fs = require('fs')
module.exports.download = function (req, res) {
var S3_BUCKET = 'mybucketname'
var s3 = new AWS.S3({
accessKeyId: process.env.ACCESSKEY,
secretAccessKey: process.env.SECRETKEY,
region: process.env.REGION
});
var os = require('os');
var filenames = "file1.jpg,file2.jpg"
var str_array = filenames.split(',');
for (var i = 0; i < str_array.length; i++) {
var filename = str_array[i].trim();
localFileName = os.homedir() + "\\" + "Downloads" + "\\" + filename,
file = fs.createWriteStream(localFileName);
s3.getObject({
Bucket: S3_BUCKET,
Key: filename
})
.on('error', function (err) {
res.end("File download failed with error " + err.message);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
})
.send();
}
res.end("Files have been downloaded successfully")
}

It is better to use pipe function rather than handle it manually.
s3.getObject(options)
.createReadStream()
.on('error', e => ...)
.pipe(file)
.on('error', e => ...);

Related

How to I make newman library take the csv file sequentially?

This is my code
var fs = require('fs'),
path = require('path');
cheerio = require('cheerio'),
newman = require('newman'),
os = require("os");
const directoryPath = path.join(__dirname, './payload');
fs.readdir(directoryPath, function(err, files) {
if (err) {
return console.log('Unable to scan directory: ' + err);
}
files.forEach(function(file) {
console.log("Files Read")
runNewman(file);
});
});
function runNewman(data) {
let logs=[];
var csvFile = "payload/" + data;
//var logFile = "payload/" + data + ".txt";
newman.run({
collection: require('./kmap_testing.postman_collection.json'),
environment: require('./kmap.postman_environment.json'),
globals: require('./My Workspace.postman_globals.json'),
reporters: 'cli',
iterationData:csvFile
}).on('start',function(err,args){
console.log('start');
}).on('console',function(err, args){
if(err){return;}
logs.push(args.messages);
}).on('done',function(err, summary){
if(err || summary.error){
console.error('collection run encounter an error');
}
else{
console.log('collection run completed');
}
fs.appendFileSync("results.csv",logs.join("\r\n"));
})
}
I have huge csv file with almost 100K+ data, I have split the data into 5K per csv file and saved them under Payload folder. However, newman.run - takes the files randomly or parallely and runs. The results.csv file ends up running two times and has more than 200K + results. Someone, please help me with this? I am a beginner with newman library.

AWS S3 SDK: how do I get the filename from the progress callback in a multi file upload?

So I'm using the AWS S3 Javascript SDK to upload multiple files. This code below works well, but I do have one issue. The function (evt) for progress is called asynchronously and I cannot get which filename it was called for. evt does not include filename. Is there a way to know?
How do I output the filename to console (where I show the >>> I NEED THE FILENAME HERE <<<)
for (var i = 0; i < files.length; i++) {
var file = files[i];
if (files[i]) {
var params = {Key: file.name, ContentType: file.type, Body: file};
bucket.upload(params).on('httpUploadProgress', function(evt) {
console.log("Uploaded " + >>> I NEED THE FILENAME HERE <<< + " " + parseInt((evt.loaded * 100) / evt.total)+'%');
}).send(function(err, data) {
//alert("File uploaded successfully -- " + err);
});
}
}
evt.key -> should give the file name that is being uploaded...!!!
Edited:
here is the full code I am using for the managed upload
app.post('/uploadLargeFile',upload.array('file', 20),function(req,res){
console.log("received File")
var file = req.files;
for(var i=0;i<req.files.length;i++){
var file = req.files[i];
uploadLargeFiles(file);
}
})
function uploadLargeFiles(file){
var params={
ACL :'public-read',
Body : new Buffer(file.buffer),
Bucket:'ascendon1',
ContentType:file.mimetype,
Key:file.originalname
}
var managedUpload = new AWS.S3.ManagedUpload({
params:params
});
managedUpload.on('httpUploadProgress', function(progress) {
console.log(progress);
});
managedUpload.send(function(err, data) {
console.log(err, data);
});
}

NodeJS - Downloading from Google Drive

I have this piece of code in order to download files from Google Drive:
function downloadDrive(fileId, callback) {
var fileExt = fileId.split(".");
var file = Date.now() + "." + fileExt[fileExt.length - 1];
var dest = fs.createWriteStream("./files/"+file);
service.files.get({
auth: oauth2Client,
fileId: fileExt[0],
alt: "media"
})
.on("finish", function() {
callback(file);
})
.on("error", function(err) {
console.log("Error during download", err);
})
.pipe(dest);
}
It works very well on small files ~500Mb. However, when trying to download a quite a big gzip file ~3Gb, it throws the following error.
buffer.js:23
const ui8 = new Uint8Array(size);
^
RangeError: Invalid typed array length
at new Uint8Array (native)
at createBuffer (buffer.js:23:15)
at allocate (buffer.js:98:12)
at new Buffer (buffer.js:53:12)
at Function.Buffer.concat (buffer.js:225:16)
at BufferList.copy (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/node_modules/bl/bl.js:124:21)
at BufferList.slice (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/node_modules/bl/bl.js:99:15)
at BufferList.toString (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/node_modules/bl/bl.js:166:15)
at Request.<anonymous> (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/request.js:1035:36)
at emitOne (events.js:82:20)
at Request.emit (events.js:169:7)
I didn't find a lot of information about it. What is going on?
You can try following to download file. The error seems to be because you are downloading large file.
var filename = 'zzz.txt';
var proxyUrl = "http://" + user + ":" + password + "#" + host + ":" + port;
var token = 'YOUR_TOKEN';
var req = request.get('https://www.googleapis.com/drive/v3/files/YOUR_FILE_ID?alt=media', {
'auth': {
'bearer': token
},
'proxy': proxyUrl
}).on('response', function(res) {
// create file write stream
var fws = fs.createWriteStream(filename);
// setup piping
res.pipe(fws);
res.on('err', function() {
console.log("error occured.....");
});
res.on('end', function() {
console.log('Done');
// go on with processing
});
});

How to download and unzip a zip file in memory in NodeJs?

I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?
Here is what I tried:
var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';
var request = require('request'), fs = require('fs'), zlib = require('zlib');
request.get(url, function(err, res, file) {
if(err) throw err;
zlib.unzip(file, function(err, txt) {
if(err) throw err;
console.log(txt.toString()); //outputs nothing
});
});
[EDIT]
As, suggested, I tried using the adm-zip library and I still cannot make this work:
var ZipEntry = require('adm-zip/zipEntry');
request.get(url, function(err, res, zipFile) {
if(err) throw err;
var zip = new ZipEntry();
zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));
var text = zip.getData();
console.log(text.toString()); // fails
});
You need a library that can handle buffers. The latest version of adm-zip will do:
npm install adm-zip
My solution uses the http.get method, since it returns Buffer chunks.
Code:
var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';
var AdmZip = require('adm-zip');
var http = require('http');
http.get(file_url, function(res) {
var data = [], dataLen = 0;
res.on('data', function(chunk) {
data.push(chunk);
dataLen += chunk.length;
}).on('end', function() {
var buf = Buffer.alloc(dataLen);
for (var i = 0, len = data.length, pos = 0; i < len; i++) {
data[i].copy(buf, pos);
pos += data[i].length;
}
var zip = new AdmZip(buf);
var zipEntries = zip.getEntries();
console.log(zipEntries.length)
for (var i = 0; i < zipEntries.length; i++) {
if (zipEntries[i].entryName.match(/readme/))
console.log(zip.readAsText(zipEntries[i]));
}
});
});
The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.
Update
This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.
var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';
var AdmZip = require('adm-zip');
var request = require('request');
request.get({url: file_url, encoding: null}, (err, res, body) => {
var zip = new AdmZip(body);
var zipEntries = zip.getEntries();
console.log(zipEntries.length);
zipEntries.forEach((entry) => {
if (entry.entryName.match(/readme/i))
console.log(zip.readAsText(entry));
});
});
The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.
Sadly you can't pipe the response stream into the unzip job as node zlib lib allows you to do, you have to cache and wait the end of the response. I suggest you to pipe the response to a fs stream in case of big files, otherwise you will full fill your memory in a blink!
I don't completely understand what you are trying to do, but imho this is the best approach. You should keep your data in memory only the time you really need it, and then stream to the csv parser.
If you want to keep all your data in memory you can replace the csv parser method fromPath with from that takes a buffer instead and in getData return directly unzipped
You can use the AMDZip (as #mihai said) instead of node-zip, just pay attention because AMDZip is not yet published in npm so you need:
$ npm install git://github.com/cthackers/adm-zip.git
N.B. Assumption: the zip file contains only one file
var request = require('request'),
fs = require('fs'),
csv = require('csv')
NodeZip = require('node-zip')
function getData(tmpFolder, url, callback) {
var tempZipFilePath = tmpFolder + new Date().getTime() + Math.random()
var tempZipFileStream = fs.createWriteStream(tempZipFilePath)
request.get({
url: url,
encoding: null
}).on('end', function() {
fs.readFile(tempZipFilePath, 'base64', function (err, zipContent) {
var zip = new NodeZip(zipContent, { base64: true })
Object.keys(zip.files).forEach(function (filename) {
var tempFilePath = tmpFolder + new Date().getTime() + Math.random()
var unzipped = zip.files[filename].data
fs.writeFile(tempFilePath, unzipped, function (err) {
callback(err, tempFilePath)
})
})
})
}).pipe(tempZipFileStream)
}
getData('/tmp/', 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip', function (err, path) {
if (err) {
return console.error('error: %s' + err.message)
}
var metadata = []
csv().fromPath(path, {
delimiter: '|',
columns: true
}).transform(function (data){
// do things with your data
if (data.NAME[0] === '#') {
metadata.push(data.NAME)
} else {
return data
}
}).on('data', function (data, index) {
console.log('#%d %s', index, JSON.stringify(data, null, ' '))
}).on('end',function (count) {
console.log('Metadata: %s', JSON.stringify(metadata, null, ' '))
console.log('Number of lines: %d', count)
}).on('error', function (error) {
console.error('csv parsing error: %s', error.message)
})
})
If you're under MacOS or Linux, you can use the unzip command to unzip from stdin.
In this example I'm reading the zip file from the filesystem into a Buffer object but it works
with a downloaded file as well:
// Get a Buffer with the zip content
var fs = require("fs")
, zip = fs.readFileSync(__dirname + "/test.zip");
// Now the actual unzipping:
var spawn = require('child_process').spawn
, fileToExtract = "test.js"
// -p tells unzip to extract to stdout
, unzip = spawn("unzip", ["-p", "/dev/stdin", fileToExtract ])
;
// Write the Buffer to stdin
unzip.stdin.write(zip);
// Handle errors
unzip.stderr.on('data', function (data) {
console.log("There has been an error: ", data.toString("utf-8"));
});
// Handle the unzipped stdout
unzip.stdout.on('data', function (data) {
console.log("Unzipped file: ", data.toString("utf-8"));
});
unzip.stdin.end();
Which is actually just the node version of:
cat test.zip | unzip -p /dev/stdin test.js
EDIT: It's worth noting that this will not work if the input zip is too big to be read in one chunk from stdin. If you need to read bigger files, and your zip file contains only one file, you can use funzip instead of unzip:
var unzip = spawn("funzip");
If your zip file contains multiple files (and the file you want isn't the first one) I'm afraid to say you're out of luck. Unzip needs to seek in the .zip file since zip files are just a container, and unzip may just unzip the last file in it. In that case you have to save the file temporarily (node-temp comes in handy).
Two days ago the module node-zip has been released, which is a wrapper for the JavaScript only version of Zip: JSZip.
var NodeZip = require('node-zip')
, zip = new NodeZip(zipBuffer.toString("base64"), { base64: true })
, unzipped = zip.files["your-text-file.txt"].data;

Download large file with node.js avoiding high memory consumption

I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"),
http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
events = require("events");
var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";
var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()
var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();
var dlprogress = 0;
setInterval(function () {
sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);
request.addListener('response', function (response) {
response.setEncoding('binary')
sys.puts("File size: " + response.headers['content-length'] + " bytes.")
var body = '';
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
body += chunk;
});
response.addListener("end", function() {
fs.writeFileSync(filename, body, 'binary');
sys.puts("After download finished");
});
});
I changed the callback to:
request.addListener('response', function (response) {
var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
downloadfile.write(chunk, encoding='binary');
});
response.addListener("end", function() {
downloadfile.end();
sys.puts("Finished downloading " + filename);
});
});
This worked perfectly.
does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))
Take a look at http-request:
// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.buffer.toString());
});
// save the response to 'myfile.bin' with a progress callback
http.get({
url: 'http://localhost/get',
progress: function (current, total) {
console.log('downloaded %d bytes from %d', current, total);
}
}, 'myfile.bin', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.file);
});
When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) {
var requestserver = http.request(options, function(r) {
console.log('STATUS: ' + r.statusCode);
console.log('HEADERS: ' + JSON.stringify(r.headers));
var fd = fs.openSync('sai.tar.gz', 'w');
r.on('data', function (chunk) {
size += chunk.length;
console.log(size+'bytes received');
sendstatus(res,size);
fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
});
});
r.on('end',function(){
console.log('\nended from server');
fs.closeSync(fd);
sendendstatus(res);
});
});
}
Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.
Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});
var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);
r.on('data', function(data) {
inspect('binary data received');
});
downloadFile.on('end', function () {
inspect(downloadPath, 'file downloaded to path');
});
downloadFile.on('error', function (err) {
inspect(err, 'error downloading file');
});
You may need to install modules which you can do via
npm install filed request eyespect temp

Categories

Resources