NodeJS - Downloading from Google Drive

NodeJS - Downloading from Google Drive - javascript

I have this piece of code in order to download files from Google Drive:
function downloadDrive(fileId, callback) {
var fileExt = fileId.split(".");
var file = Date.now() + "." + fileExt[fileExt.length - 1];
var dest = fs.createWriteStream("./files/"+file);
service.files.get({
auth: oauth2Client,
fileId: fileExt[0],
alt: "media"
})
.on("finish", function() {
callback(file);
})
.on("error", function(err) {
console.log("Error during download", err);
})
.pipe(dest);
}
It works very well on small files ~500Mb. However, when trying to download a quite a big gzip file ~3Gb, it throws the following error.
buffer.js:23
const ui8 = new Uint8Array(size);
^
RangeError: Invalid typed array length
at new Uint8Array (native)
at createBuffer (buffer.js:23:15)
at allocate (buffer.js:98:12)
at new Buffer (buffer.js:53:12)
at Function.Buffer.concat (buffer.js:225:16)
at BufferList.copy (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/node_modules/bl/bl.js:124:21)
at BufferList.slice (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/node_modules/bl/bl.js:99:15)
at BufferList.toString (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/node_modules/bl/bl.js:166:15)
at Request.<anonymous> (/Synology/server_Metagenomics/server/node_modules/googleapis/node_modules/google-auth-library/node_modules/request/request.js:1035:36)
at emitOne (events.js:82:20)
at Request.emit (events.js:169:7)
I didn't find a lot of information about it. What is going on?

You can try following to download file. The error seems to be because you are downloading large file.
var filename = 'zzz.txt';
var proxyUrl = "http://" + user + ":" + password + "#" + host + ":" + port;
var token = 'YOUR_TOKEN';
var req = request.get('https://www.googleapis.com/drive/v3/files/YOUR_FILE_ID?alt=media', {
'auth': {
'bearer': token
},
'proxy': proxyUrl
}).on('response', function(res) {
// create file write stream
var fws = fs.createWriteStream(filename);
// setup piping
res.pipe(fws);
res.on('err', function() {
console.log("error occured.....");
});
res.on('end', function() {
console.log('Done');
// go on with processing
});
});

Related

NodeJS finish writing the file with pipe before continuing with the next iteration

Similar to this question,
I have a script that downloads a file to a given url via http.get.
How can I make sure the pipe is finished before continuing to the next iteration with just the http/https module??
//nodejs default libs
var fs = require("fs");
var http = require('https');
function dlFile(fullFilePath, dlUrl, fsize, fname){
var file = fs.createWriteStream(fullFilePath); //fullFilePath will dictate where we will save the file + filename.
var rsult ='';
var downloadedFsize;
var stats; //stats of the file will be included here
var request = http.get( dlUrl, function(response) {
let rsult = response.statusCode;
//will respond with a 200 if the file is present
//404 if file is missing
response.pipe(file);
/*pipe writes the file...
how do we stop the iteration while it is not yet finished writing?
*/
console.log(" \n FILE : " + fname);
console.log("File analysis finished : statusCode: " + rsult + " || Saved on " + fullFilePath);
console.log(' \n Downloaded from :' + dlUrl);
console.log(' \n SQL File size is : ' + fsize);
//identify filesize
stats = fs.statSync(fullFilePath);
downloadedFsize = stats["size"]; //0 because the pipe isn't finished yet...
console.log(' actual file size is : ' + downloadedFsize);
}).on('error', function(e) {
console.error(e);
//log that an error happened to the file
}).on('end', function(e){
//tried putting the above script here but nothing happens
});
return rsult;
}
Is there a cleaner approach similar to what I have in mind above? or should I approach this differently? I tried putting the code on .on('end' but it does nothing

The end event is not triggered on the request, instead it is triggered on the response (docs):
response.on("end", function() {
console.log("done");
});

As #Jonas Wilms says, the trigger was indeed on response.
//nodejs default libs
var fs = require("fs");
var http = require('https');
function dlFile(fullFilePath, dlUrl, fsize, fname){
var file = fs.createWriteStream(fullFilePath); //fullFilePath will dictate where we will save the file + filename.
var rsult ='';
var downloadedFsize;
var stats; //stats of the file will be included here
var request = http.get( dlUrl, function(response) {
let rsult = response.statusCode;
//will respond with a 200 if the file is present
//404 if file is missing
response.pipe(file).on('finish', function(e){
console.log(" \n FILE : " + fname);
console.log("File analysis finished : statusCode: " + rsult + " || Saved on " + fullFilePath);
console.log(' \n Downloaded from :' + dlUrl);
console.log(' \n SQL File size is : ' + fsize);
//identify filesize
stats = fs.statSync(fullFilePath);
downloadedFsize = stats["size"];
console.log(' actual file size is : ' + downloadedFsize);
});
/*pipe writes the file above, and output the results once it's done */
}).on('error', function(e) {
console.error(e);
//log that an error happened to the file
}).on('end', function(e){
//tried putting the above script here but nothing happens
});
return rsult;
}

Error while downloading multiple files from AWS S3 using Nodejs

I am getting error "File download failed with error write after end" while trying to download multiple files form AWS s3 using the below code snippet.
Can some one help me to figure out the root cause of the error?It would be great if any one can suggest a solution as well
Below code works without any issues if only a single file needs to be downloaded
var AWS = require('aws-sdk');
var fs = require('fs')
module.exports.download = function (req, res) {
var S3_BUCKET = 'mybucketname'
var s3 = new AWS.S3({
accessKeyId: process.env.ACCESSKEY,
secretAccessKey: process.env.SECRETKEY,
region: process.env.REGION
});
var os = require('os');
var filenames = "file1.jpg,file2.jpg"
var str_array = filenames.split(',');
for (var i = 0; i < str_array.length; i++) {
var filename = str_array[i].trim();
localFileName = os.homedir() + "\\" + "Downloads" + "\\" + filename,
file = fs.createWriteStream(localFileName);
s3.getObject({
Bucket: S3_BUCKET,
Key: filename
})
.on('error', function (err) {
res.end("File download failed with error " + err.message);
})
.on('httpData', function (chunk) {
file.write(chunk);
})
.on('httpDone', function () {
file.end();
})
.send();
}
res.end("Files have been downloaded successfully")
}

It is better to use pipe function rather than handle it manually.
s3.getObject(options)
.createReadStream()
.on('error', e => ...)
.pipe(file)
.on('error', e => ...);

Writing an image to file, received over an HTTP request in Node

I'm certain I'm missing something obvious, but the gist of the problem is I'm receiving a PNG from a Mapbox call with the intent of writing it to the file system and serving it to the client. I've successfully relayed the call, received a response of raw data and written a file. The problem is that my file ends up truncated no matter what path I take, and I've exhausted the answers I've found skirting the subject. I've dumped the raw response to the log, and it's robust, but any file I make tends to be about a chunk's worth of unreadable data.
Here's the code I've got at present for the file making. I tried this buffer move as a last ditch after several failed and comparably fruitless iterations. Any help would be greatly appreciated.
module.exports = function(req, res, cb) {
var cartography = function() {
return https.get({
hostname: 'api.mapbox.com',
path: '/v4/mapbox.wheatpaste/' + req.body[0] + ',' + req.body[1] + ',6/750x350.png?access_token=' + process.env.MAPBOX_API
}, function(res) {
var body = '';
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
var mapPath = 'map' + req.body[0] + req.body[1] + '.png';
var map = new Buffer(body, 'base64');
fs.writeFile(__dirname + '/client/images/maps/' + mapPath, map, 'base64', function(err) {
if (err) throw err;
cb(mapPath);
})
})
});
};
cartography();
};

It is possible to rewrite your code in more compact subroutine:
const fs = require('fs');
const https = require('https');
https.get(url, (response)=> { //request itself
if(response) {
let imageName = 'image.png'; // for this purpose I usually use crypto
response.pipe( //pipe response to a write stream (file)
fs.createWriteStream( //create write stream
'./public/' + imageName //create a file with name image.png
)
);
return imageName; //if public folder is set as default in app.js
} else {
return false;
}
})
You could get original name and extension from url, but it safer to generate a new name with crypto and get file extension like i said from url or with read-chunk and file-type modules.

GridFS put() with Mongoose dies in write concern code?

I have the following code (trimmed, assume all the closing stuff is there), which dies deep down inside GridFS:
var Grid = require('mongodb').Grid;
var mongoose = require('mongoose');
var db = mongoose.connect('mongodb://localhost:27017/ksnap');
router.route('/').post(function(req, res) {
var post = new Post();
var busboy = new Busboy({ headers: req.headers });
req.pipe(busboy);
busboy.on('file', function(fieldname, file, filename, encoding, mimetype) {
console.log('File [' + fieldname + ']: filename: ' + filename + ', encoding: ' + encoding + ', mimetype: ' + mimetype);
if (fieldname != 'img') { return; }
var bufs = [];
file.on('data', function(data) {
console.log('File [' + fieldname + '] got ' + data.length + ' bytes');
bufs.push(data);
}); // busboy file on data
file.on('end', function() {
console.log('File [' + fieldname + '] Finished');
var buf = Buffer.concat(bufs);
var grid = new Grid(db, 'fs');
grid.put(buf, {metadata:{category:'image'}, content_type: 'image'}, function(err, result) {
if (err) { console.log(err); } else { console.log(result); }
});
Stack trace:
/opt/ksnap-server/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:1552
} else if(self.safe.w != null || typeof self.safe.j == 'boolean' || typeof s
^
TypeError: Cannot read property 'w' of undefined
at _getWriteConcern (/opt/ksnap-server/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:1552:22)
at Stream.GridStore (/opt/ksnap-server/node_modules/mongodb/lib/mongodb/gridfs/gridstore.js:100:23)
at Grid.put (/opt/ksnap-server/node_modules/mongodb/lib/mongodb/gridfs/grid.js:52:19)
at FileStream.<anonymous> (/opt/ksnap-server/server.js:83:13)
at FileStream.emit (events.js:117:20)
at _stream_readable.js:943:16
at process._tickCallback (node.js:419:13)
Busboy returns a stream which I put into a buffer, so far so good. This works fine, I've tested it. But when I try to grid.put() the buffer, it dies as above. I've tried to trace it, but I'm having trouble. As far as I can tell, the all the options get eaten in grid.js, so by the time they get passed down to gridstore.js it's just an empty object. Mongoose just doesn't set this, I guess.

I was able to get past this error by manually setting db.safe = {w: 1}; after opening the connection, however when I did the grid.put() it just stuck there. Swapping out mongoose for a regular mongodb connection worked, so I guess currently mongoose just doesn't work with GridFS.
I was finally able to get everything (apparently) working by adding the streamifier and gridfs-stream modules, and the following mongo setup:
var streamifier = require('streamifier');
var Grid = require('gridfs-stream');
mongoose.connect('mongodb://localhost:27017/ksnap');
Then later, when I'm ready to save the file to GridFS:
var gfs = new Grid(mongoose.connection.db, mongoose.mongo);
var writestream = gfs.createWriteStream({
mode: 'w',
filename: post.id,
content_type: 'image/jpeg'
});
streamifier.createReadStream(buffer).pipe(writestream);
writestream.on('close', function (file) {
console.log("saved 300px as "+file.filename);
});
And save the post document itself to MongoDB:
post.save(function(err) {
if (err) { res.send(err); }
console.log('saved post '+post.id);
res.send(post);
});
This was the combination of options that worked for me. One of the keys was using mongoose.connect(), not mongoose.createConnection(), which would let me save the files, but not the documents.

I know this has been a while - I saw the same issue - make sure your mongoose session is connected to the DB - ie
mongoose.connection.once("connected", function () {...} has been called, then load the require files and files. This ensures the db object in the connection is bound to an existing mongo session. If you find the mongoose.connection.db is null and mongoose.connection is NOT null then you will have initialized your grid stream with an uninitialized mongodb connection.

Download large file with node.js avoiding high memory consumption

I`m trying to create a file downloader as a background service but when a large file is scheduled, it's first put in memory and then, at the end of the download the file is written to disk.
How can I make the file be wrote gradually to the disk preserving memory considering that I may have lots of files being downloaded at the same time?
Here's the code I`m using:
var sys = require("sys"),
http = require("http"),
url = require("url"),
path = require("path"),
fs = require("fs"),
events = require("events");
var downloadfile = "http://nodejs.org/dist/node-v0.2.6.tar.gz";
var host = url.parse(downloadfile).hostname
var filename = url.parse(downloadfile).pathname.split("/").pop()
var theurl = http.createClient(80, host);
var requestUrl = downloadfile;
sys.puts("Downloading file: " + filename);
sys.puts("Before download request");
var request = theurl.request('GET', requestUrl, {"host": host});
request.end();
var dlprogress = 0;
setInterval(function () {
sys.puts("Download progress: " + dlprogress + " bytes");
}, 1000);
request.addListener('response', function (response) {
response.setEncoding('binary')
sys.puts("File size: " + response.headers['content-length'] + " bytes.")
var body = '';
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
body += chunk;
});
response.addListener("end", function() {
fs.writeFileSync(filename, body, 'binary');
sys.puts("After download finished");
});
});

I changed the callback to:
request.addListener('response', function (response) {
var downloadfile = fs.createWriteStream(filename, {'flags': 'a'});
sys.puts("File size " + filename + ": " + response.headers['content-length'] + " bytes.");
response.addListener('data', function (chunk) {
dlprogress += chunk.length;
downloadfile.write(chunk, encoding='binary');
});
response.addListener("end", function() {
downloadfile.end();
sys.puts("Finished downloading " + filename);
});
});
This worked perfectly.

does the request package work for your uses?
it lets you do things like this:
request(downloadurl).pipe(fs.createWriteStream(downloadtohere))

Take a look at http-request:
// shorthand syntax, buffered response
http.get('http://localhost/get', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.buffer.toString());
});
// save the response to 'myfile.bin' with a progress callback
http.get({
url: 'http://localhost/get',
progress: function (current, total) {
console.log('downloaded %d bytes from %d', current, total);
}
}, 'myfile.bin', function (err, res) {
if (err) throw err;
console.log(res.code, res.headers, res.file);
});

When downloading large file please use fs.write and not writeFile as it will override the previous content.
function downloadfile(res) {
var requestserver = http.request(options, function(r) {
console.log('STATUS: ' + r.statusCode);
console.log('HEADERS: ' + JSON.stringify(r.headers));
var fd = fs.openSync('sai.tar.gz', 'w');
r.on('data', function (chunk) {
size += chunk.length;
console.log(size+'bytes received');
sendstatus(res,size);
fs.write(fd, chunk, 0, chunk.length, null, function(er, written) {
});
});
r.on('end',function(){
console.log('\nended from server');
fs.closeSync(fd);
sendendstatus(res);
});
});
}

Instead of holding the content into memory in the "data" event listener you should write to the file in append mode.

Use streams like Carter Cole suggested. Here is a more complete example
var inspect = require('eyespect').inspector();
var request = require('request');
var filed = require('filed');
var temp = require('temp');
var downloadURL = 'http://upload.wikimedia.org/wikipedia/commons/e/ec/Hazard_Creek_Kayaker.JPG';
var downloadPath = temp.path({prefix: 'singlePageRaw', suffix: '.jpg'});
var downloadFile = filed(downloadPath);
var r = request(downloadURL).pipe(downloadFile);
r.on('data', function(data) {
inspect('binary data received');
});
downloadFile.on('end', function () {
inspect(downloadPath, 'file downloaded to path');
});
downloadFile.on('error', function (err) {
inspect(err, 'error downloading file');
});
You may need to install modules which you can do via
npm install filed request eyespect temp

Develop Reference

JavaScript is the programming language of the Web.

NodeJS - Downloading from Google Drive - javascript

Related

NodeJS finish writing the file with pipe before continuing with the next iteration

Error while downloading multiple files from AWS S3 using Nodejs

Writing an image to file, received over an HTTP request in Node

GridFS put() with Mongoose dies in write concern code?

Download large file with node.js avoiding high memory consumption

Categories

Resources