Upload a large file using nodejs - javascript

I have the following nodejs code which uploads a file by calling a server-side API (written by me) and passing the file content as a multi-part request. The problem is that my code works perfectly with small files but it fails with large files (1 MB or above). I'm pretty sure it's a problem in my code but I'm not able to find out what it is.
// assume file content have been read into post_data array
//Make post
var google = http.createClient(443, host, secure = true);
var filepath = '/v2_0/put_file/';
var GMTdate = (new Date()).toGMTString();
var fileName = encodeURIComponent(destination);
console.log("fileName : " + fileName);
console.log("Path : " + filepath);
var header = {
'Host': host,
'Authorization': 'Basic ' + authStr,
'Content-Type': 'multipart/form-data; boundary=0xLhTaLbOkNdArZ',
'Last-Modified': GMTdate,
'Filename': fileName,
'Last-Access-By': username
};
var request = google.request('POST', filepath, header);
for (var i = 0; i < post_data.length; i++) {
request.write(post_data[i]);
}
request.end();
request.addListener('response', function(response){
var noBytest = 0;
response.setEncoding('utf8');
console.log('STATUS: ' + response);
console.log('STATUS: ' + response.statusCode);
console.log('HEADERS: ' + JSON.stringify(response.headers));
console.log('File Size: ' + response.headers['content-length'] + " bytes.");
From the logs, I see that control comes to request.end(); but I do not see the last few logs written after request.addListener() block.
I've been pulling my hair off for last couple of days trying to understand why it works for small files but not for larger files. I don't see any timeouts and the code just seems to be hung till I kill it off.
Can anyone guide me as to what am I doing wrong?
UPDATE:
post_data is an array, here is what I'm doing
post_data = [];
console.log('ContentType =' + ContentType + "\n\nEncoding Style =" + encodingStyle);
post_data.push(new Buffer(EncodeFilePart(boundary, ContentType, 'theFile', FileNameOnly), 'ascii'));
var file_contents = '';
var file_reader = fs.createReadStream(filename, {
encoding: encodingStyle
});
file_reader.on('data', function(data){
console.log('in data');
file_contents += data;
});
file_reader.on('end', function(){
post_data.push(new Buffer(file_contents, encodingStyle))
post_data.push(new Buffer("\r\n--" + boundary + "--\r\n", 'ascii'));
...
var request = google.request('POST', filepath, header);
for (var i = 0; i < post_data.length; i++) {
request.write(post_data[i]);
}
I look forward to your suggestions.

You should be passing either an array or a string to request.write . Is post_data an array of strings, or an array of arrays?
Also, you are posting it as multipart/form-data, so that means you have to modify your data to that format. Have you done so, or is post_data just the raw data from a file?

checkout node-formidable and this post http://debuggable.com/posts/parsing-file-uploads-at-500-mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb

Related

NodeJS finish writing the file with pipe before continuing with the next iteration

Similar to this question,
I have a script that downloads a file to a given url via http.get.
How can I make sure the pipe is finished before continuing to the next iteration with just the http/https module??
//nodejs default libs
var fs = require("fs");
var http = require('https');
function dlFile(fullFilePath, dlUrl, fsize, fname){
var file = fs.createWriteStream(fullFilePath); //fullFilePath will dictate where we will save the file + filename.
var rsult ='';
var downloadedFsize;
var stats; //stats of the file will be included here
var request = http.get( dlUrl, function(response) {
let rsult = response.statusCode;
//will respond with a 200 if the file is present
//404 if file is missing
response.pipe(file);
/*pipe writes the file...
how do we stop the iteration while it is not yet finished writing?
*/
console.log(" \n FILE : " + fname);
console.log("File analysis finished : statusCode: " + rsult + " || Saved on " + fullFilePath);
console.log(' \n Downloaded from :' + dlUrl);
console.log(' \n SQL File size is : ' + fsize);
//identify filesize
stats = fs.statSync(fullFilePath);
downloadedFsize = stats["size"]; //0 because the pipe isn't finished yet...
console.log(' actual file size is : ' + downloadedFsize);
}).on('error', function(e) {
console.error(e);
//log that an error happened to the file
}).on('end', function(e){
//tried putting the above script here but nothing happens
});
return rsult;
}
Is there a cleaner approach similar to what I have in mind above? or should I approach this differently? I tried putting the code on .on('end' but it does nothing
The end event is not triggered on the request, instead it is triggered on the response (docs):
response.on("end", function() {
console.log("done");
});
As #Jonas Wilms says, the trigger was indeed on response.
//nodejs default libs
var fs = require("fs");
var http = require('https');
function dlFile(fullFilePath, dlUrl, fsize, fname){
var file = fs.createWriteStream(fullFilePath); //fullFilePath will dictate where we will save the file + filename.
var rsult ='';
var downloadedFsize;
var stats; //stats of the file will be included here
var request = http.get( dlUrl, function(response) {
let rsult = response.statusCode;
//will respond with a 200 if the file is present
//404 if file is missing
response.pipe(file).on('finish', function(e){
console.log(" \n FILE : " + fname);
console.log("File analysis finished : statusCode: " + rsult + " || Saved on " + fullFilePath);
console.log(' \n Downloaded from :' + dlUrl);
console.log(' \n SQL File size is : ' + fsize);
//identify filesize
stats = fs.statSync(fullFilePath);
downloadedFsize = stats["size"];
console.log(' actual file size is : ' + downloadedFsize);
});
/*pipe writes the file above, and output the results once it's done */
}).on('error', function(e) {
console.error(e);
//log that an error happened to the file
}).on('end', function(e){
//tried putting the above script here but nothing happens
});
return rsult;
}

Writing an image to file, received over an HTTP request in Node

I'm certain I'm missing something obvious, but the gist of the problem is I'm receiving a PNG from a Mapbox call with the intent of writing it to the file system and serving it to the client. I've successfully relayed the call, received a response of raw data and written a file. The problem is that my file ends up truncated no matter what path I take, and I've exhausted the answers I've found skirting the subject. I've dumped the raw response to the log, and it's robust, but any file I make tends to be about a chunk's worth of unreadable data.
Here's the code I've got at present for the file making. I tried this buffer move as a last ditch after several failed and comparably fruitless iterations. Any help would be greatly appreciated.
module.exports = function(req, res, cb) {
var cartography = function() {
return https.get({
hostname: 'api.mapbox.com',
path: '/v4/mapbox.wheatpaste/' + req.body[0] + ',' + req.body[1] + ',6/750x350.png?access_token=' + process.env.MAPBOX_API
}, function(res) {
var body = '';
res.on('data', function(chunk) {
body += chunk;
});
res.on('end', function() {
var mapPath = 'map' + req.body[0] + req.body[1] + '.png';
var map = new Buffer(body, 'base64');
fs.writeFile(__dirname + '/client/images/maps/' + mapPath, map, 'base64', function(err) {
if (err) throw err;
cb(mapPath);
})
})
});
};
cartography();
};
It is possible to rewrite your code in more compact subroutine:
const fs = require('fs');
const https = require('https');
https.get(url, (response)=> { //request itself
if(response) {
let imageName = 'image.png'; // for this purpose I usually use crypto
response.pipe( //pipe response to a write stream (file)
fs.createWriteStream( //create write stream
'./public/' + imageName //create a file with name image.png
)
);
return imageName; //if public folder is set as default in app.js
} else {
return false;
}
})
You could get original name and extension from url, but it safer to generate a new name with crypto and get file extension like i said from url or with read-chunk and file-type modules.

Google Cloud Storage upload request sometimes works, sometimes doesn't - make resumable?

A bit of a pickle here. I've got a Google Cloud Storage bucket set up, and a webpage that, probably about 60% of the time, DOES upload a file to the bucket. However, sometimes it doesn't and I don't really know why.
I'm trying this from a mobile handset so it could be the signal. As such, I guess one thing I need to think about is making it resumable (the files themselves aren't big - less than 2mb, but the service we're getting isn't great).
So, how could I make this into a resumable (rather than multipart) upload? And do you think this would solve the issue?
Code below:
function insertObject(event) {
try {
var fileData = event.target.files[0];
}
catch(e) {
return;
}
var boundary = '-------314159265358979323846';
var delimiter = "\r\n--" + boundary + "\r\n";
var close_delim = "\r\n--" + boundary + "--";
var reader = new FileReader();
reader.readAsBinaryString(fileData);
reader.onload = function(e) {
var contentType = fileData.type || 'application/octet-stream';
var metadata = {
'name': fileData.name,
'mimeType': contentType
};
var base64Data = btoa(reader.result);
var multipartRequestBody =
delimiter +
'Content-Type: application/json\r\n\r\n' +
JSON.stringify(metadata) +
delimiter +
'Content-Type: ' + contentType + '\r\n' +
'Content-Transfer-Encoding: base64\r\n' +
'\r\n' +
base64Data +
close_delim;
//Note: gapi.client.storage.objects.insert() can only insert
//small objects (under 64k) so to support larger file sizes
//we're using the generic HTTP request method gapi.client.request()
var request = gapi.client.request({
'path': '/upload/storage/' + API_VERSION + '/b/' + BUCKET + '/o',
'method': 'POST',
'params': {'uploadType': 'multipart'},
'headers': {
'Content-Type': 'multipart/mixed; boundary="' + boundary + '"'
},
'body': multipartRequestBody});
//Remove the current API result entry in the main-content div
//listChildren = document.getElementById('main-content').childNodes;
//if (listChildren.length > 1) {
// listChildren[1].parentNode.removeChild(listChildren[1]);
//}
//look at http://stackoverflow.com/questions/30317797/uploading-additional-metadata-as-part-of-file-upload-request-to-google-cloud-sto
try{
//Execute the insert object request
//document.getElementById("authorize-button").click();
executeRequest(request, 'insertObject');
//Store the name of the inserted object
document.getElementById("btnUpload").click();
object = fileData.name;
}
catch(e) {
alert('An error has occurred: ' + e.message);
}
}
}
EDIT - just tried it from my PC and it's uploaded 17 files with no problems - normally it just starts dropping them straight away from the mobile handset. So I think it is a signal issue and making it resumable would do the job.
Well, this is embarrassing.
I decided to check that the code was uploading the file correctly, and got rid of the
document.getElementById("btnUpload").click();
object = fileData.name;
in the final element of the code (the btnUpload does some C# stuff that I do need, but I don't need the object at all). I tried 20 pictures from my mobile handset and, when it would have usually only done about 12, it did all 20 - and a lot more effectively.
So I think the issue is the btnUpload.click that is taking control of the process away from the file transfer before it's finished. I've just got to find a way to incorporate that back into the code without interfering with things.
Github link
And just moved the btnUpload.click() from being within the insertobject to the executeRequest (just after where it calls the log). There just seems to be a small problem now, but not with the file upload. The btnUpload stuff gets called where it should, and the rest is simply sorting out the webpage.
Edit - as an aside, in order to cope with the request length being long (for files which are 4mb or more), I added this to the web.config (in the system.web):
<httpRuntime maxRequestLength="214748364"/>

Trying to upload files from a webpage to Google Cloud Storage

I've got a website (in asp.net) through which I want users to upload photos to a Google Cloud Storage bucket. The users doing the uploading CAN be authenticated with Google if necessary (though I would prefer it if they weren't - the site is locked down with usernames/passwords/captchas).
Slightly unrelated - the photos in the bucket though have to be visible to everyone, so long as they have the link (some of our clients have IT depts who refuse to allow them to use Google accounts, and we can't change their minds). I would want the link ideally returned when the photo is uploaded.
I have the following Javascript code, which I think should work (basically it's taken from here):
<script type="text/javascript" src="https://apis.google.com/js/client:plusone.js"></script>
<script type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/1/jquery.min.js"></script>
<script type="text/javascript" src="https://apis.google.com/js/client.js"></script>
<script type="text/javascript">
var PROJECT = 'MY_PROJECT';
var clientId = 'MY_CLIENT_ID_ENDING_IN_apps.googleusercontent.com';
var apiKey = 'MY_API_KEY';
var scopes = 'https://www.googleapis.com/auth/devstorage.read_write';
//quick question - I've got a photo in the bucket already, and its
//URL points to a v1_internal folder. Would that mean that the API
//version is v1_internal?
var API_VERSION = 'v1';
var BUCKET = 'MY_BUCKET';
var object = "";
//question - when using a specific group, should this read group-blahblah
//or, for instance, owners-blahblah
//or even group-owners-blahblah?
var GROUP = 'group-MY_LONG_GROUP_ID';
//stuck on these next few ones
var ENTITY = 'group-Owners';
var ROLE = 'OWNER';
var ROLE_OBJECT = 'OWNER';
function insertObject(event) {
try {
var fileData = event.target.files[0];
}
catch(e) {
//'Insert Object' selected from the API Commands select list
//Display insert object button and then exit function
//filePicker.style.display = 'block';
return;
}
var boundary = '-------314159265358979323846';
var delimiter = "\r\n--" + boundary + "\r\n";
var close_delim = "\r\n--" + boundary + "--";
var reader = new FileReader();
reader.readAsBinaryString(fileData);
reader.onload = function(e) {
var contentType = fileData.type || 'application/octet-stream';
var metadata = {
'name': fileData.name,
'mimeType': contentType
};
var base64Data = btoa(reader.result);
var multipartRequestBody =
delimiter +
'Content-Type: application/json\r\n\r\n' +
JSON.stringify(metadata) +
delimiter +
'Content-Type: ' + contentType + '\r\n' +
'Content-Transfer-Encoding: base64\r\n' +
'\r\n' +
base64Data +
close_delim;
//Note: gapi.client.storage.objects.insert() can only insert
//small objects (under 64k) so to support larger file sizes
//we're using the generic HTTP request method gapi.client.request()
var request = gapi.client.request({
'path': '/upload/storage/' + API_VERSION + '/b/' + BUCKET + '/o',
'method': 'POST',
'params': {'uploadType': 'multipart'},
'headers': {
'Content-Type': 'multipart/mixed; boundary="' + boundary + '"'
},
'body': multipartRequestBody});
//Remove the current API result entry in the main-content div
//listChildren = document.getElementById('main-content').childNodes;
//if (listChildren.length > 1) {
// listChildren[1].parentNode.removeChild(listChildren[1]);
//}
//look at http://stackoverflow.com/questions/30317797/uploading-additional-metadata-as-part-of-file-upload-request-to-google-cloud-sto
try{
//Execute the insert object request
executeRequest(request, 'insertObject');
//Store the name of the inserted object
object = fileData.name;
}
catch(e) {
alert('An error has occurred: ' + e.message);
}
}
}
</script>
Currently, I can execute the function. The file ends up within the function (I've been able to call an alert on the fileData.name). However, it doesn't end up in the bucket, and there's no error message brought up.
Does this code look okay, or is it a problem with how the storage bucket could be set up? And am I using the correct values (or have I formatted them correctly)?
Sorted it. For those who have tried using this code and, like me, didn't bother to read it all, Google is using an "authorize" button. Basically, the example code they provided gives you the ability to more or less "attach" the file, then another button which does the actual uploading. I hadn't seen this myself.

Video streaming in node.js

I want to stream the video using node.js. I have followed this article. And it's working fine from local. But I need to stream video from web.
My requirement is little different. I have to hide the original URL from source code. So, instead of showing original URL, I need to show my own URL like <source src="http://localhost:8888" type="video/mp4"/>. Instead of showing <source src="http://goo.gl/KgGx0s" type="video/mp4"/>.
Using the following codes
var indexPage, movie_webm, movie_mp4, movie_ogg;
fs.readFile(path.resolve(__dirname,"ANY_LOCAL_VIDEO.mp4"), function (err, data) {
if (err) {
throw err;
}
console.log(data.length);
movie_mp4 = data;
});
http.createServer(function (req, res) {
var reqResource = url.parse(req.url).pathname;
var total;
total = movie_mp4.length;
var range = req.headers.range;
var positions = range.replace(/bytes=/, "").split("-");
var start = parseInt(positions[0], 10);
var end = positions[1] ? parseInt(positions[1], 10) : total - 1;
var chunksize = (end-start)+1;
res.writeHead(206, { "Content-Range": "bytes " + start + "-" + end + "/" + total,
"Accept-Ranges": "bytes",
"Content-Length": chunksize,
"Content-Type":"video/mp4"});
res.end(movie_mp4.slice(start, end+1), "binary");
}).listen(8888);
It's working fine for local videos. But if I give fs.readFile("http://goo.gl/KgGx0s", function (err, data) { instead of above code it's not working. I tried by changing fs.readFile to fs.filepath but still not working. I am getting this error
c:\git\personal\streaming-video-html5\server.js:13
throw err;
^
Error: ENOENT, open 'c:\git\personal\streaming-video-html5\http:\goo.gl\KgGx0s'
This may be because the path is changing. What approach should I follow? Is it possible?
What you're looking for is something like the request module.
You can simply fetch your remote video and then pipe it directly to your response.
What I'd do is first make a HEAD request to the remote video in order to confirm that it accepts byte ranges and also to get the content length of the video.
Once you've got that information you can set your response headers, with a HTTP status code of 206, indicating that this is a partial response.
Now that you've written your response headers you create a request to the remote video, sending along the byte-range in the original request and pipe that directly to your response (res);
var fileUrl = 'https://ia800300.us.archive.org/1/items/night_of_the_living_dead/night_of_the_living_dead_512kb.mp4';
var range = req.headers.range;
var positions, start, end, total, chunksize;
// HEAD request for file metadata
request({
url: fileUrl,
method: 'HEAD'
}, function(error, response, body){
setResponseHeaders(response.headers);
pipeToResponse();
});
function setResponseHeaders(headers){
positions = range.replace(/bytes=/, "").split("-");
start = parseInt(positions[0], 10);
total = headers['content-length'];
end = positions[1] ? parseInt(positions[1], 10) : total - 1;
chunksize = (end-start)+1;
res.writeHead(206, {
"Content-Range": "bytes " + start + "-" + end + "/" + total,
"Accept-Ranges": "bytes",
"Content-Length": chunksize,
"Content-Type":"video/mp4"
});
}
function pipeToResponse() {
var options = {
url: fileUrl,
headers: {
range: "bytes=" + start + "-" + end,
connection: 'keep-alive'
}
};
request(options).pipe(res);
}
You can probably cache the response from the HEAD request so that you don't need to do it each time a byte-range is requested.

Categories

Resources