I use Javascript to get the file names from AWS S3 bucket. After i get the complete list of file names from the bucket, i parse & manipulate it & list them in front-end. In this case, i want to get rid of the problem which will arise when the bucket with filled with huge amount of data (i.e) i'll run out of memory when i try to manipulate with a shocking amount of data. So, i only need the file names of the very first layer.
Example :
The complete object in S3 bucket :
{
new_folder: {...},
some_file.png: {...}
}
Here i only need the names -> new_folder, some_file.png
Below is the code which i use now :
const AWS = require('aws-sdk');
export default async function wasabiActions(dataObj) {
var accessKeyId = '************';
var secretAccessKey = '********************';
var wasabiEndpoint = new AWS.Endpoint('s3.us-west-1.wasabisys.com');
var s3 = await new AWS.S3({
endpoint: wasabiEndpoint,
accessKeyId: accessKeyId,
secretAccessKey: secretAccessKey
});
var params = {
Bucket: 'bucket_name',
};
s3.listObjectsV2(params, function(err, data) {
if (!err) {
var files = [];
data.Contents.forEach(function(element) {
files.push(element.Key.split('/').filter((name) => name.length > 0));
});
console.log(files);
var parsedData = wasabiDataParser(files);
console.log(parsedData);
}
});
}
Thanks in advance! :)
You can use pagination. Some AWS operations return results that are incomplete and require subsequent requests in order to attain the entire result set. Check here for more details. https://boto3.amazonaws.com/v1/documentation/api/latest/guide/paginators.html
I have an API method that when called and passed an array of file keys, downloads them from S3. I'd like to stream them, rather than download to disk, followed by zipping the files and returning that to the client.
This is what my current code looks like:
reports.get('/xxx/:filenames ', async (req, res) => {
var AWS = require('aws-sdk');
var s3 = new AWS.S3();
var str_array = filenames.split(',');
for (var i = 0; i < str_array.length; i++) {
var filename = str_array[i].trim();
localFileName = './' + filename;
var params = {
Bucket: config.reportBucket,
Key: filename
}
s3.getObject(params, (err, data) => {
if (err) console.error(err)
var file = require('fs').createWriteStream(localFileName);
s3.getObject(params).createReadStream().pipe(file);
console.log(file);
})
}
});
How would I stream the files rather than downloading them to disk and how would I zip them to return that to the client?
Main problem is to zip multiple files.
More specifically, download them from AWS S3 in bulk.
I've searched through AWS SDK and didn't find bulk s3 operations.
Which brings us to one possible solution:
Load files one by one and store them to folder
Zip folder (with some package like this)
Send zipped folder
This is raw and untested example, but it might give you the idea:
// Always import packages at the beginning of the file.
const AWS = require('aws-sdk');
const fs = require('fs');
const zipFolder = require('zip-folder');
const s3 = new AWS.S3();
reports.get('/xxx/:filenames ', async (req, res) => {
const filesArray = filenames.split(',');
for (const fileName of filesArray) {
const localFileName = './' + filename.trim();
const params = {
Bucket: config.reportBucket,
Key: filename
}
// Probably you'll need here some Promise logic, to handle stream operation end.
const fileStream = fs.createWriteStream(localFileName);
s3.getObject(params).createReadStream().pipe(fileStream);
}
// After that all required files would be in some target folder.
// Now you need to compress the folder and send it back to user.
// We cover callback function in promise, to make code looks "sync" way.
await new Promise(resolve => zipFolder('/path/to/the/folder', '/path/to/archive.zip', (err) => {resolve()});
// And now you can send zipped folder to user (also using streams).
fs.createReadStream('/path/to/archive.zip').pipe(res);
});
Info about streams link and link
Attention: You'll probably could have some problems with async behaviour, according to streams nature, so, please, first of all, check if all files are stored in folder before zipping.
Just a mention, I've not tested this code. So if any questions appear, let's debug together
In my controller, when I try to readFile send from browser by AJAX, suddenly 1 directory created into my public folder with something like
'3d6c3049-839b-40ce-9aa3-b76f08bf140b' -> file -> myfile
exports.assetAdd = function(req, res) {
var d = JSON.parse(req.body.data);
var f = req.files.file;
return ;
//here i can see my unwanted created directory
// Create S3 service object
var s3 = new AWS.S3({
apiVersion: '2017-03-01'
});
// console.log("file",f)
fs.readFile(f.file, function(err, data) {
return res.json(data);
How to remove this?
This is issue with the package, Already opened issue
https://github.com/yahoo/express-busboy/issues/16
I am successfully uploading an image to my Amazon S3 bucket, but when I go to the URL that it is generated, my image is instantly downloaded instead of being displayed in the browser. My code is below:
var fs = require('fs');
var AWS = require('aws-sdk');
AWS.config = new AWS.Config();
AWS.config.accessKeyId = "MY_ACCESS_KEY";
AWS.config.secretAccessKey = "MY_SECRET_KEY";
AWS.config.region = "us-west-2";
AWS.config.apiVersions = {
"s3": "2006-03-01"
}
var s3 = new AWS.S3();
var bodystream = fs.createReadStream('./meme1.png');
var params = {
'Bucket': 'MY_BUCKET_NAME',
'Key': 'uploads/images/' + 'Img4.png',
'Body': bodystream,
'ContentEncoding': 'base64',
'ContentType ': 'image/png',
'ACL':'public-read-write'
};
s3.upload(params, function(err, data){
console.log('RESPONSE: ', err, data);
});
Some other posts say to make sure the ContentType property of the params object is correct but I am pretty sure that mine is correct. How do I fix this and make the image display in the browser instead of downloading?
You are missing the Content Disposition, by default it's "attachement" which means the file is being downloaded instead of displayed.
ContentDisposition: inline
(Answering this Question for Googlers)
I want to download a zip file from the internet and unzip it in memory without saving to a temporary file. How can I do this?
Here is what I tried:
var url = 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip';
var request = require('request'), fs = require('fs'), zlib = require('zlib');
request.get(url, function(err, res, file) {
if(err) throw err;
zlib.unzip(file, function(err, txt) {
if(err) throw err;
console.log(txt.toString()); //outputs nothing
});
});
[EDIT]
As, suggested, I tried using the adm-zip library and I still cannot make this work:
var ZipEntry = require('adm-zip/zipEntry');
request.get(url, function(err, res, zipFile) {
if(err) throw err;
var zip = new ZipEntry();
zip.setCompressedData(new Buffer(zipFile.toString('utf-8')));
var text = zip.getData();
console.log(text.toString()); // fails
});
You need a library that can handle buffers. The latest version of adm-zip will do:
npm install adm-zip
My solution uses the http.get method, since it returns Buffer chunks.
Code:
var file_url = 'http://notepad-plus-plus.org/repository/7.x/7.6/npp.7.6.bin.x64.zip';
var AdmZip = require('adm-zip');
var http = require('http');
http.get(file_url, function(res) {
var data = [], dataLen = 0;
res.on('data', function(chunk) {
data.push(chunk);
dataLen += chunk.length;
}).on('end', function() {
var buf = Buffer.alloc(dataLen);
for (var i = 0, len = data.length, pos = 0; i < len; i++) {
data[i].copy(buf, pos);
pos += data[i].length;
}
var zip = new AdmZip(buf);
var zipEntries = zip.getEntries();
console.log(zipEntries.length)
for (var i = 0; i < zipEntries.length; i++) {
if (zipEntries[i].entryName.match(/readme/))
console.log(zip.readAsText(zipEntries[i]));
}
});
});
The idea is to create an array of buffers and concatenate them into a new one at the end. This is due to the fact that buffers cannot be resized.
Update
This is a simpler solution that uses the request module to obtain the response in a buffer, by setting encoding: null in the options. It also follows redirects and resolves http/https automatically.
var file_url = 'https://github.com/mihaifm/linq/releases/download/3.1.1/linq.js-3.1.1.zip';
var AdmZip = require('adm-zip');
var request = require('request');
request.get({url: file_url, encoding: null}, (err, res, body) => {
var zip = new AdmZip(body);
var zipEntries = zip.getEntries();
console.log(zipEntries.length);
zipEntries.forEach((entry) => {
if (entry.entryName.match(/readme/i))
console.log(zip.readAsText(entry));
});
});
The body of the response is a buffer that can be passed directly to AdmZip, simplifying the whole process.
Sadly you can't pipe the response stream into the unzip job as node zlib lib allows you to do, you have to cache and wait the end of the response. I suggest you to pipe the response to a fs stream in case of big files, otherwise you will full fill your memory in a blink!
I don't completely understand what you are trying to do, but imho this is the best approach. You should keep your data in memory only the time you really need it, and then stream to the csv parser.
If you want to keep all your data in memory you can replace the csv parser method fromPath with from that takes a buffer instead and in getData return directly unzipped
You can use the AMDZip (as #mihai said) instead of node-zip, just pay attention because AMDZip is not yet published in npm so you need:
$ npm install git://github.com/cthackers/adm-zip.git
N.B. Assumption: the zip file contains only one file
var request = require('request'),
fs = require('fs'),
csv = require('csv')
NodeZip = require('node-zip')
function getData(tmpFolder, url, callback) {
var tempZipFilePath = tmpFolder + new Date().getTime() + Math.random()
var tempZipFileStream = fs.createWriteStream(tempZipFilePath)
request.get({
url: url,
encoding: null
}).on('end', function() {
fs.readFile(tempZipFilePath, 'base64', function (err, zipContent) {
var zip = new NodeZip(zipContent, { base64: true })
Object.keys(zip.files).forEach(function (filename) {
var tempFilePath = tmpFolder + new Date().getTime() + Math.random()
var unzipped = zip.files[filename].data
fs.writeFile(tempFilePath, unzipped, function (err) {
callback(err, tempFilePath)
})
})
})
}).pipe(tempZipFileStream)
}
getData('/tmp/', 'http://bdn-ak.bloomberg.com/precanned/Comdty_Calendar_Spread_Option_20120428.txt.zip', function (err, path) {
if (err) {
return console.error('error: %s' + err.message)
}
var metadata = []
csv().fromPath(path, {
delimiter: '|',
columns: true
}).transform(function (data){
// do things with your data
if (data.NAME[0] === '#') {
metadata.push(data.NAME)
} else {
return data
}
}).on('data', function (data, index) {
console.log('#%d %s', index, JSON.stringify(data, null, ' '))
}).on('end',function (count) {
console.log('Metadata: %s', JSON.stringify(metadata, null, ' '))
console.log('Number of lines: %d', count)
}).on('error', function (error) {
console.error('csv parsing error: %s', error.message)
})
})
If you're under MacOS or Linux, you can use the unzip command to unzip from stdin.
In this example I'm reading the zip file from the filesystem into a Buffer object but it works
with a downloaded file as well:
// Get a Buffer with the zip content
var fs = require("fs")
, zip = fs.readFileSync(__dirname + "/test.zip");
// Now the actual unzipping:
var spawn = require('child_process').spawn
, fileToExtract = "test.js"
// -p tells unzip to extract to stdout
, unzip = spawn("unzip", ["-p", "/dev/stdin", fileToExtract ])
;
// Write the Buffer to stdin
unzip.stdin.write(zip);
// Handle errors
unzip.stderr.on('data', function (data) {
console.log("There has been an error: ", data.toString("utf-8"));
});
// Handle the unzipped stdout
unzip.stdout.on('data', function (data) {
console.log("Unzipped file: ", data.toString("utf-8"));
});
unzip.stdin.end();
Which is actually just the node version of:
cat test.zip | unzip -p /dev/stdin test.js
EDIT: It's worth noting that this will not work if the input zip is too big to be read in one chunk from stdin. If you need to read bigger files, and your zip file contains only one file, you can use funzip instead of unzip:
var unzip = spawn("funzip");
If your zip file contains multiple files (and the file you want isn't the first one) I'm afraid to say you're out of luck. Unzip needs to seek in the .zip file since zip files are just a container, and unzip may just unzip the last file in it. In that case you have to save the file temporarily (node-temp comes in handy).
Two days ago the module node-zip has been released, which is a wrapper for the JavaScript only version of Zip: JSZip.
var NodeZip = require('node-zip')
, zip = new NodeZip(zipBuffer.toString("base64"), { base64: true })
, unzipped = zip.files["your-text-file.txt"].data;