I am using the Office Javascript API to write an Add-in for Word using Angular.
I want to retrieve the Word document through the API, then convert it to a file and upload it via POST to a server.
The code I am using is nearly identical to the documentation code that Microsoft provides for this use case: https://dev.office.com/reference/add-ins/shared/document.getfileasync#example---get-a-document-in-office-open-xml-compressed-format
The server endpoint requires uploads to be POSTed through a multipart form, so I create a FormData object on which I append the file (a blob) as well as some metadata, when creating the $http call.
The file is being transmitted to the server, but when I open it, it has become corrupted and it can no longer be opened by Word.
According to the documentation, the Office.context.document.getFileAsync function returns a byte array. However, the resulting fileContent variable is a string. When I console.log this string it seems to be compressed data, like it should be.
My guess is I need to do some preprocessing before turning the string into a Blob. But which preprocessing? Base64 encoding through atob doesn't seem to be doing anything.
let sendFile = ( fileContent ) => {
let blob = new Blob([fileContent], { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }),
fd = new FormData();
blob.lastModifiedDate = new Date();
fd.append('file', blob, 'uploaded_file_test403.docx');
fd.append('case_id', caseIdReducer.data());
$http.post('/file/create', fd, {
transformRequest: angular.identity,
headers: { 'Content-Type': undefined }
})
.success( ( ) => {
console.log('upload succeeded');
})
.error(( ) => {
console.log('upload failed');
});
};
function onGotAllSlices(docdataSlices) {
let docdata = [];
for (let i = 0; i < docdataSlices.length; i++) {
docdata = docdata.concat(docdataSlices[i]);
}
let fileContent = new String();
for (let j = 0; j < docdata.length; j++) {
fileContent += String.fromCharCode(docdata[j]);
}
// Now all the file content is stored in 'fileContent' variable,
// you can do something with it, such as print, fax...
sendFile(fileContent);
}
function getSliceAsync(file, nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived) {
file.getSliceAsync(nextSlice, (sliceResult) => {
if (sliceResult.status === 'succeeded') {
if (!gotAllSlices) { // Failed to get all slices, no need to continue.
return;
}
// Got one slice, store it in a temporary array.
// (Or you can do something else, such as
// send it to a third-party server.)
docdataSlices[sliceResult.value.index] = sliceResult.value.data;
if (++slicesReceived === sliceCount) {
// All slices have been received.
file.closeAsync();
onGotAllSlices(docdataSlices);
} else {
getSliceAsync(file, ++nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
}
} else {
gotAllSlices = false;
file.closeAsync();
console.log(`getSliceAsync Error: ${sliceResult.error.message}`);
}
});
}
// User clicks button to start document retrieval from Word and uploading to server process
ctrl.handleClick = ( ) => {
Office.context.document.getFileAsync(Office.FileType.Compressed, { sliceSize: 65536 /*64 KB*/ },
(result) => {
if (result.status === 'succeeded') {
// If the getFileAsync call succeeded, then
// result.value will return a valid File Object.
let myFile = result.value,
sliceCount = myFile.sliceCount,
slicesReceived = 0, gotAllSlices = true, docdataSlices = [];
// Get the file slices.
getSliceAsync(myFile, 0, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
} else {
console.log(`Error: ${result.error.message}`);
}
}
);
};
I ended up doing this with the fileContent string:
let bytes = new Uint8Array(fileContent.length);
for (let i = 0; i < bytes.length; i++) {
bytes[i] = fileContent.charCodeAt(i);
}
I then proceed to build the Blob with these bytes:
let blob = new Blob([bytes], { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
If I then send this via a POST request, the file isn't mangled and can be opened correctly by Word.
I still get the feeling this can be achieved with less hassle / less steps. If anyone has a better solution, I'd be very interested to learn.
thx for your answer, Uint8Array was the solution. Just a little improvement, to avoid creating the string:
let bytes = new Uint8Array(docdata.length);
for (var i = 0; i < docdata.length; i++) {
bytes[i] = docdata[i];
}
Pff! what is wrong with a getting a instance of File and not using FileReader api? c'mon Microsoft!
You should take the byte array and throw it into the blob constructor, turning a binary blob to string in javascript is a bad idea that can lead to "out of range" error or incorrect encoding
just do something along with this
var byteArray = new Uint8Array(3)
byteArray[0] = 97
byteArray[1] = 98
byteArray[2] = 99
new Blob([byteArray])
if the chunk is an instance of a typed arrays or a instance of blob/file. in that case you can just do:
blob = new Blob([blob, chunk])
And please... don't base64 encode it (~3x larger + slower)
Related
I am trying to upload a pdf from the frontend to my node server. The PDF successfully uploads on the node server but when I go to open it, I am unable to. Instead, I see a message that says "File cant be opened. Something went wrong." Why is this happening?
Also please dont suggest third party pdf uploaders like multer, etc. I am aware of these third party libraries but I just want pure node. Thank you so much.
Frontend code:
const uploadFile = document.getElementById("uploadFile");
uploadFile.addEventListener("change", (event) => {
readFile(event.target.files[0]);
});
function readFile(file) {
const uploadDesignPDF = `http://localhost:7000/api/upload/design`;
let fileReader = new FileReader();
fileReader.readAsDataURL(file);
fileReader.addEventListener("load", async (event) => {
let pdfStrChunk = event.target.result.replace(
/^data:application\/[a-z]+;base64,/,
""
);
let fileSize = file.size;
const chunk = 85000;
let numOfChunkSet = Math.ceil(fileSize / chunk);
let remainingChunk = fileSize;
let currentChunk = 0;
let chunkSet = [];
let range = {};
let data = {};
for (let i = 0; i < numOfChunkSet; i++) {
remainingChunk -= chunk;
if (remainingChunk < 0) {
remainingChunk += chunk;
chunkSet.push(remainingChunk);
range.start = currentChunk;
range.end = currentChunk + chunk;
currentChunk += remainingChunk;
} else {
chunkSet.push(chunk);
range.start = currentChunk;
range.end = (i + 1) * chunkSet[i];
currentChunk += chunk;
}
const chunkRead = pdfStrChunk.slice(range.start, range.end);
data.dataPDF = chunkRead;
let response = await fetch(uploadDesignPDF, {
method: "POST",
body: JSON.stringify(data),
headers: {
"Content-Type": "application/json",
},
responseType: "arrayBuffer",
responseEncoding: "binary",
});
let results = await response.json();
console.log(results);
}
});
}
Backend route:
const { uploadDesigns } = require("./upload.designs.controller.js");
const router = require("express").Router();
router.post("/upload/design", uploadDesigns);
Backend:
uploadDesigns: async (req, res) => {
try {
fs.writeFileSync(`./designs/testingPDF6.pdf`, req.body.dataPDF, "base64");
res.status(200).json({
message: "done with chunk",
});
} catch (error) {
res.status(500).json({
message: "Something went wrong. Please refresh page.",
});
}
}
You are working with base64-URL in vain. It is much more effective to use ArrayBuffer. The main advantage of ArrayBuffer is the 1-byte unit, while base64 breaks the byte representation three out of four times.
Instead of sending the file in chunks, I would suggest tracking progress through XMLHttpRequest.upload.onprogress(). I would only use chunks if the upload is through a WebSocket.
If the PDF file is the only information sent to the server, I'd prefer to send the file directly without any field names or other FormData information provided. In that case, it would be appropriate to change the POST method to PUT.
If you prefer to send the file directly, it would be ideal to use fs.createWriteStream() instead of fs.writeFileSync().
Then this approach will work
const ws = fs.createWriteStream(tmpFilePath);
request.pipe(ws);
To control the integrity of the data, you can add md5 or sha hash to the request headers and, on the server, duplicate the data stream into the object created by crypto.createHash(). In case of a hash mismatch, the file can be uploaded again.
I am trying to make a JavaScript that would take an image file and covert it into BLOB (by converting the file into Base64 first and then into BLOB), my project doesn't have a support for toBlob() so I have found different convering steps and put them together and they work to a point where I have to pass the BLOB from the function where its made out for the Mysql part of code that takes care of communicating with the database. (I have that fully working). Now I only need to find a way how to connect them through a variable that saves the results of the imageforQuery function.
My code so far is this:
let base64String = "";
function imageforQuery(imageid) {
//takes file and converts to Base64
var file = document.getElementById(imageid).files[0];
var reader = new FileReader();
console.log("next");
imgFileFrontBlob = "";
reader.onload = function () {
base64String = reader.result.replace("data:", "")
.replace(/^.+,/, "");
// console.log(base64String);
base64String = 'data:image/jpeg;base64,'+ base64String;
console.log(base64String);
//converts Base64 into BLOB
var binary = atob(base64String.split(',')[1]);
console.log(binary);
var array = [];
for(var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i));
}
var imgFileFrontBlob = new Blob([new Uint8Array(array)], {type: 'image/png'});
console.log(imgFileFrontBlob);
return imgFileFrontBlob
}
reader.readAsDataURL(file);
};
by experimenting with console.log() at different stages and return I have found out that I can't pass the converted BLOB result out, as the function imageforQuery() only returns what is after reader.readAsDataURL(file); and I don't know of a way of getting that result out.
––––––––––––––ADDITIONAL PROBLEMS I HAVE ENCOUNTERED––––––––––––––
okay so thanks to Emiel Zuurbier (Thank you!) I have managed to rewrite my code with the help of his solution. However as much as it helped one part of the problem, it didn't help with the JavaScript Blob object as we found out it is not the exact same thing as SQL BLOB.
Now the problem is that upon trying to send the Blob object in a SQL query, this resulted in just sending pure text "[Blob object]".
But I am using JavaScript successfully to pull the data from a BLOB field from my database and convert it into Base64 images from that data that was stored in the BLOB in a different part of my application. The code for that is below:
var converterEngine = function (input) {
// fn BLOB => Binary => Base64 ?
var uInt8Array = new Uint8Array(input),
i = uInt8Array.length;
var biStr = []; //new Array(I);
while (i--) { biStr[i] = String.fromCharCode(uInt8Array[i]); }
var base64 = window.btoa(biStr.join(''));
return base64;
};
What I need to do is just reverse this and in theory, it should get me the same data that I receive from the database.
My reversal code is below:
// this is the inside bit of code from the first problem that is solved and the
// typeOfData variable is parsed into the function in imageforQuery() as a second input
// variable (in other words its not to be of concern)
reader.onload = function () {
let base64String = reader.result.replace("data:", "").replace(/^.+,/, "");
base64String = "data:" + typeOfData + ";base64," + base64String;
var binary = atob(base64String.split(",")[1]);
// console.log(binary);
var array = [];
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i));
}
var ourArray = new Uint8Array(array);
resolve(ourArray);
};
However, as I mentioned the data that comes out (ourArray) isn't actually identical to the original file from the BLOB in the database so my code doesn't function correctly and I don't know why. Any ideas where I've made a mistake?
Base64 is simply ascii text. So MySQL's datatype BLOB or TEXT would work. That is, after converting to Base64, don't worry about "convert to blob"; it is not necessary.
That is, you can probably replace the code from //converts ... through return ... by simply
return base64String;
You can wrap the FileReader instance and calls inside of a Promise. Return the Promise immediately. In the reader.onload function call resolve() to exit the Promise with a value.
function imageforQuery(imageid) {
return new Promise(resolve => {
var file = document.getElementById(imageid).files[0];
var reader = new FileReader();
reader.onload = function () {
let base64String = reader.result.replace("data:", "").replace(/^.+,/, "");
base64String = "data:image/jpeg;base64," + base64String;
var binary = atob(base64String.split(",")[1]);
var array = [];
for (var i = 0; i < binary.length; i++) {
array.push(binary.charCodeAt(i));
}
var imgFileFrontBlob = new Blob([new Uint8Array(array)], {
type: "image/png",
});
resolve(imgFileFrontBlob);
};
reader.readAsDataURL(file);
});
}
This results in being able to use your function like here below. imageforQuery is called, returns a Promise. When the promise is finished (meaning resolve is called) the function in the then method will run.
imageforQuery(imageId).then(imgFileFrontBlob => {
// Use your blob here.
saveToDB(imgFileFrontBlob); // Example of how you would use it.
});
Or use it with async / await.
(async () => {
function imageforQuery(imageid) {
...
}
// Here we can wait for imageforQuery to finish and save the variable.
const imgFileFrontBlob = await imageforQuery(imageId);
saveToDB(imgFileFrontBlob); // Example of how you would use it.
})()
I have used the below JS code provided by Microsoft in order to save a document in PDF:
Office.context.document.getFileAsync(Office.FileType.Pdf,
function(result) {
if (result.status == "succeeded") {
var myFile = result.value;
var sliceCount = myFile.sliceCount;
var slicesReceived = 0, gotAllSlices = true, docdataSlices = [];
console.log("File size:" + myFile.size + " #Slices: " + sliceCount);
// Now, you can call getSliceAsync to download the files,
// as described in the previous code segment (compressed format).
// Get the file slices.
getSliceAsync(myFile, 0, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
myFile.closeAsync();
}
else {
console.log("Error:", result.error.message);
}
}
);
function getSliceAsync(file, nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived) {
file.getSliceAsync(nextSlice, function (sliceResult) {
if (sliceResult.status == "succeeded") {
if (!gotAllSlices) { // Failed to get all slices, no need to continue.
return;
}
// Got one slice, store it in a temporary array.
// (Or you can do something else, such as
// send it to a third-party server.)
docdataSlices[sliceResult.value.index] = sliceResult.value.data;
if (++slicesReceived == sliceCount) {
// All slices have been received.
file.closeAsync();
onGotAllSlices(docdataSlices);
}
else {
getSliceAsync(file, ++nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
}
}
else {
gotAllSlices = false;
file.closeAsync();
console.log("getSliceAsync Error:", sliceResult.error.message);
}
});
}
function onGotAllSlices(docdataSlices) {
var docdata = [];
for (var i = 0; i < docdataSlices.length; i++) {
docdata = docdata.concat(docdataSlices[i]);
}
var fileContent = new String();
for (var j = 0; j < docdata.length; j++) {
fileContent += String.fromCharCode(docdata[j]);
}
console.log('Final PDF content is received and stored in fileContent.');
send_file_content(fileContent);
}
function send_file_content(word_doc) {
var formData = new FormData();
var blob = new Blob([word_doc], { type: "application/pdf"});
formData.append("file", blob);
$.ajax({
type: 'POST',
url: 'My-upload-URL',
data: formData,
processData: false,
contentType: false
}).done(function(data) {
console.log('* Word Document successfully uploaded: ', data.filepath);
});
}
I'm pretty sure that the server side is OK as I have uploaded zillions of PDF documents and it works as expected, but when I upload Word PDF Document via the above JS code I get a blank page on server side. If the word document contains 3 pages then I will get 3 blank pages on server-side as a PDF file.
The Microsoft Documentation goes with charCodeAt function which ruins the data and makes a blank PDF document.
Instead of using that function, I used Uint8Array on the byte array directly:
var blob = new Blob([new Uint8Array(myFinalByteArray)], { type: 'application/pdf' });
And then uploaded the Blob using FormData into a remote server. The problem gone away with this approach.
I have this working but I want to share this out to see if I missed anything obvious and to solve a mystery as to why my file chunk size has to be a multiple of 2049. The main requirements are:
Files uploaded from website must be stored in SQL server, not as files
Website must be able to download and display file data as a file (opened in a separate window.
Website is angularjs/javascript SPA, no server side code, no MVC
API is Web API 2 (again not MVC)
I'm just going to focus on the download part here. Basically what I'm doing is:
Read a chunk of data from SQL server varbinary field
Web API 2 api returns file name, mime type and byte data as a base64 string. NOTE - tried returning byte array but Web API just serializes it into base64 string anyway.
concatenate the chunks, convert the chunks to a blob and display
VB library function that returns a dataset with the chunk (I have to use this library which handles the database connection but doesn't support parameter queries)
Public Function GetWebApplicationAttachment(ByVal intId As Integer, ByVal intChunkNumber As Integer, ByVal intChunkSize As Integer) As DataSet
' the starting number is NOT 0 based
Dim intStart As Integer = 1
If intChunkNumber > 1 Then intStart = ((intChunkNumber - 1) * intChunkSize) + 1
Dim strQuery As String = ""
strQuery += "SELECT FileName, "
strQuery += "SUBSTRING(ByteData," & intStart.ToString & "," & intChunkSize.ToString & ") AS ByteData "
strQuery += "FROM FileAttachments WHERE Id = " + intId.ToString + " "
Try
Return Query(strQuery)
Catch ex As Exception
...
End Try
End Function
Web API business rules bit that creates the file object from the dataset
...
result.FileName = ds.Tables[0].Rows[0]["FileName"].ToString();
// NOTE: Web API converts a byte array to base 64 string so the result is the same either way
// the result of this is that the returned data will be about 30% bigger than the chunk size requested
result.StringData = Convert.ToBase64String((byte[])ds.Tables[0].Rows[0]["ByteData"]);
//result.ByteData = (byte[])ds.Tables[0].Rows[0]["ByteData"];
... some code to get the mime type
result.MIMEType = ...
Web API controller (simplified - all security and error handling removed)
public IHttpActionResult GetFileAttachment([FromUri] int id, int chunkSize, int chunkNumber) {
brs = new Files(...);
fileResult file = brs.GetFileAttachment(appID, chunkNumber, chunkSize);
return Ok(file);
}
angularjs Service that gets the chunks recurively and puts them together
function getFileAttachment2(id, chunkSize, chunkNumber, def, fileData, mimeType) {
var deferred = def || $q.defer();
$http.get(webServicesPath + "api/files/get-file-attachment?id=" + id + "&chunkSize=" + chunkSize + "&chunkNumber=" + chunkNumber).then(
function (response) {
// when completed string data will be empty
if (response.data.StringData === "") {
response.data.MIMEType = mimeType;
response.data.StringData = fileData;
deferred.resolve(response.data);
} else {
if (chunkNumber === 1) {
// only the first chunk computes the mime type
mimeType = response.data.MIMEType;
}
fileData += response.data.StringData;
chunkNumber += 1;
getFileAttachment2(appID, detailID, orgID, GUID, type, chunkSize, chunkNumber, deferred, fileData, mimeType);
}
},
function (response) {
... error stuff
}
);
return deferred.promise;
}
angular controller method that makes the calls.
function viewFile(id) {
sharedInfo.getWebPortalSetting("FileChunkSize").then(function (result) {
// chunk size must be a multiple of 2049 ???
var chunkSize = 0;
if (result !== null) chunkSize = parseInt(result);
fileHelper.getFileAttachment2(id, chunkSize, 1, null, "", "").then(function (result) {
if (result.error === null) {
if (!fileHelper.viewAsFile(result.StringData, result.FileName, result.MIMEType)) {
... error
}
result = {};
} else {
... error;
}
});
});
}
And finally the bit of javascript that displays the file as a download
function viewAsFile(fileData, fileName, fileType) {
try {
fileData = window.atob(fileData);
var ab = new ArrayBuffer(fileData.length);
var ia = new Uint8Array(ab); // ia provides window into array buffer
for (var i = 0; i < fileData.length; i++) {
ia[i] = fileData.charCodeAt(i);
}
var file = new Blob([ab], { type: fileType });
fileData = "";
if (window.navigator.msSaveOrOpenBlob) // IE10+
window.navigator.msSaveOrOpenBlob(file, fileName);
else { // Others
var a = document.createElement("a"),
url = URL.createObjectURL(file);
a.href = url;
a.download = fileName;
document.body.appendChild(a);
a.click();
setTimeout(function () {
document.body.removeChild(a);
window.URL.revokeObjectURL(url);
}, 0);
}
return true;
} catch (e) {
... error stuff
}
}
I see already that a more RESTful approach would be to use headers to indicate chunk range and to separate the file meta data from the file chunks. Also I could try returning a data stream rather than Base64 encoded string. If anyone has tips on that let me know.
Well that was entirely the wrong way to go about that. In case it helps here's what I ended up doing.
Dynamically create the href address of an anchor tag to return a file (security token and parameters in query string)
get byte array from database
web api call return response message (see code below)
This is much faster and more reliable, but provides less in the way of progress monitoring.
business rule method uses...
...
file.ByteData = (byte[])ds.Tables[0].Rows[0]["ByteData"];
...
web api controller
public HttpResponseMessage ViewFileAttachment([FromUri] int id, string token) {
HttpResponseMessage response = new HttpResponseMessage();
... security stuff
fileInfoClass file = ... code to get file info
response.Content = new ByteArrayContent(file.ByteData);
response.Content.Headers.ContentDisposition =
new System.Net.Http.Headers.ContentDispositionHeaderValue("attachment") {
FileName = file.FileName
};
response.Content.Headers.ContentType = new MediaTypeHeaderValue(file.MIMEType);
return response;
This could even be improved with streaming
I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.
The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.
I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.
It's written as an Angular service called from a controller, I think just the service code is pertinent:
(function() {
'use strict';
angular
.module('app.core')
.factory('blobUploadService',
[
'$http', 'stringUtilities',
blobUploadService
]);
function blobUploadService($http, stringUtilities) {
var defaultBlockSize = 1024 * 1024; // Default to 1024KB
var stopWatch = {};
var state = {};
var initializeState = function(config) {
var blockSize = defaultBlockSize;
if (config.blockSize) blockSize = config.blockSize;
var maxBlockSize = blockSize;
var numberOfBlocks = 1;
var file = config.file;
var fileSize = file.size;
if (fileSize < blockSize) {
maxBlockSize = fileSize;
}
if (fileSize % maxBlockSize === 0) {
numberOfBlocks = fileSize / maxBlockSize;
} else {
numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
}
return {
maxBlockSize: maxBlockSize,
numberOfBlocks: numberOfBlocks,
totalBytesRemaining: fileSize,
currentFilePointer: 0,
blockIds: new Array(),
blockIdPrefix: 'block-',
bytesUploaded: 0,
submitUri: null,
file: file,
baseUrl: config.baseUrl,
sasToken: config.sasToken,
fileUrl: config.baseUrl + config.sasToken,
progress: config.progress,
complete: config.complete,
error: config.error,
cancelled: false
};
};
/* config: {
baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
sasToken: // Shared access signature querystring key/value prefixed with ?,
file: // File object using the HTML5 File API,
progress: // progress callback function,
complete: // complete callback function,
error: // error callback function,
blockSize: // Use this to override the defaultBlockSize
} */
var upload = function(config) {
state = initializeState(config);
var reader = new FileReader();
reader.onloadend = function(evt) {
if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
$http.put(uri,
requestData,
{
headers: {
'x-ms-blob-type': 'BlockBlob',
'Content-Type': state.file.type
},
transformRequest: []
})
.success(function(data, status, headers, config) {
state.bytesUploaded += requestData.length;
var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
).toFixed(2);
if (state.progress) state.progress(percentComplete, data, status, headers, config);
uploadFileInBlocks(reader, state);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
});
}
};
uploadFileInBlocks(reader, state);
return {
cancel: function() {
state.cancelled = true;
}
};
};
function cancel() {
stopWatch = {};
state.cancelled = true;
return true;
}
function startStopWatch(handle) {
if (stopWatch[handle] === undefined) {
stopWatch[handle] = {};
stopWatch[handle].start = Date.now();
}
}
function stopStopWatch(handle) {
stopWatch[handle].stop = Date.now();
var duration = stopWatch[handle].stop - stopWatch[handle].start;
delete stopWatch[handle];
return duration;
}
var commitBlockList = function(state) {
var uri = state.fileUrl + '&comp=blocklist';
var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < state.blockIds.length; i++) {
requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
}
requestBody += '</BlockList>';
$http.put(uri,
requestBody,
{
headers: {
'x-ms-blob-content-type': state.file.type
}
})
.success(function(data, status, headers, config) {
if (state.complete) state.complete(data, status, headers, config);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
// called asynchronously if an error occurs
// or server returns response with an error status.
});
};
var uploadFileInBlocks = function(reader, state) {
if (!state.cancelled) {
if (state.totalBytesRemaining > 0) {
var fileContent = state.file.slice(state.currentFilePointer,
state.currentFilePointer + state.maxBlockSize);
var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);
state.blockIds.push(btoa(blockId));
reader.readAsArrayBuffer(fileContent);
state.currentFilePointer += state.maxBlockSize;
state.totalBytesRemaining -= state.maxBlockSize;
if (state.totalBytesRemaining < state.maxBlockSize) {
state.maxBlockSize = state.totalBytesRemaining;
}
} else {
commitBlockList(state);
}
}
};
return {
upload: upload,
cancel: cancel,
startStopWatch: startStopWatch,
stopStopWatch: stopStopWatch
};
};
})();
Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.
I should say my solution is heavily based on Gaurav Mantri's blog post here:
http://gauravmantri.com/2013/02/16/uploading-large-files-in-windows-azure-blob-storage-using-shared-access-signature-html-and-javascript/#comment-47480
I can't see any obvious memory leaks or things I can change to help
garbage collection. I store the block IDs in an array so obviously
there will be some memory creeep but this shouldn't be massive. It's
almost as if the File API is holding the whole file it slices into
memory.
You are correct. The new Blobs created by .slice() are being held in memory.
The solution is to call Blob.prototype.close() on the Blob reference when processing Blob or File object is complete.
Note also, at javascript at Question also creates a new instance of FileReader if upload function is called more than once.
4.3.1. The slice method
The slice() method returns a new Blob object with bytes ranging
from the optional start parameter up to but not including the
optional end parameter, and with a type attribute that is the
value of the optional contentType parameter.
Blob instances exist for the life of document. Though Blob should be garbage collected once removed from Blob URL Store
9.6. Lifetime of Blob URLs
Note: User agents are free to garbage collect resources removed from
the Blob URL Store.
Each Blob must have an internal snapshot state, which must be
initially set to the state of the underlying storage, if any such
underlying storage exists, and must be preserved through
StructuredClone. Further normative definition of snapshot state can
be found for Files.
4.3.2. The close method
The close() method is said to close a Blob, and must act as
follows:
If the readability state of the context object is CLOSED, terminate this algorithm.
Otherwise, set the readability state of the context object to CLOSED.
If the context object has an entry in the Blob URL Store, remove the entry that corresponds to the context object.
If Blob object is passed to URL.createObjectURL(), call URL.revokeObjectURL() on Blob or File object, then call .close().
The revokeObjectURL(url) static method
Revokes the Blob URL provided in the string url by removing the corresponding entry from the Blob URL Store. This method must act
as follows:
1. If the url refers to a Blob that has a readability state of CLOSED OR if the value provided for the url argument is
not a Blob URL, OR if the value provided for the url argument does
not have an entry in the Blob URL Store, this method call does
nothing. User agents may display a message on the error console.
2. Otherwise, user agents must remove the entry from the Blob URL Store for url.
You can view the result of these calls by opening
chrome://blob-internals
reviewing details of before and after calls which create Blob and close Blob.
For example, from
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
Type: data
Length: 3
to
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
following call to .close(). Similarly from
blob:http://example.com/c2823f75-de26-46f9-a4e5-95f57b8230bd
Uuid: 29e430a6-f093-40c2-bc70-2b6838a713bc
An alternative approach could be to send file as an ArrayBuffer or chunks of array buffers. Then re-assemble the file at server.
Or you can call FileReader constructor, FileReader.prototype.readAsArrayBuffer(), and load event of FileReader each once.
At load event of FileReader pass ArrayBuffer to Uint8Array, use ReadableStream, TypedArray.prototype.subarray(), .getReader(), .read() to get N chunks of ArrayBuffer as a TypedArray at pull from Uint8Array. When N chunks equaling .byteLength of ArrayBuffer have been processed, pass array of Uint8Arrays to Blob constructor to recombine file parts into single file at browser; then send Blob to server.
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<input id="file" type="file">
<br>
<progress value="0"></progress>
<br>
<output for="file"><img alt="preview"></output>
<script type="text/javascript">
const [input, output, img, progress, fr, handleError, CHUNK] = [
document.querySelector("input[type='file']")
, document.querySelector("output[for='file']")
, document.querySelector("output img")
, document.querySelector("progress")
, new FileReader
, (err) => console.log(err)
, 1024 * 1024
];
progress.addEventListener("progress", e => {
progress.value = e.detail.value;
e.detail.promise();
});
let [chunks, NEXT, CURR, url, blob] = [Array(), 0, 0];
input.onchange = () => {
NEXT = CURR = progress.value = progress.max = chunks.length = 0;
if (url) {
URL.revokeObjectURL(url);
if (blob.hasOwnProperty("close")) {
blob.close();
}
}
if (input.files.length) {
console.log(input.files[0]);
progress.max = input.files[0].size;
progress.step = progress.max / CHUNK;
fr.readAsArrayBuffer(input.files[0]);
}
}
fr.onload = () => {
const VIEW = new Uint8Array(fr.result);
const LEN = VIEW.byteLength;
const {type, name:filename} = input.files[0];
const stream = new ReadableStream({
pull(controller) {
if (NEXT < LEN) {
controller
.enqueue(VIEW.subarray(NEXT, !NEXT ? CHUNK : CHUNK + NEXT));
NEXT += CHUNK;
} else {
controller.close();
}
},
cancel(reason) {
console.log(reason);
throw new Error(reason);
}
});
const [reader, processData] = [
stream.getReader()
, ({value, done}) => {
if (done) {
return reader.closed.then(() => chunks);
}
chunks.push(value);
return new Promise(resolve => {
progress.dispatchEvent(
new CustomEvent("progress", {
detail:{
value:CURR += value.byteLength,
promise:resolve
}
})
);
})
.then(() => reader.read().then(data => processData(data)))
.catch(e => reader.cancel(e))
}
];
reader.read()
.then(data => processData(data))
.then(data => {
blob = new Blob(data, {type});
console.log("complete", data, blob);
if (/image/.test(type)) {
url = URL.createObjectURL(blob);
img.onload = () => {
img.title = filename;
input.value = "";
}
img.src = url;
} else {
input.value = "";
}
})
.catch(e => handleError(e))
}
</script>
</body>
</html>
plnkr http://plnkr.co/edit/AEZ7iQce4QaJOKut71jk?p=preview
You can also use utilize fetch()
fetch(new Request("/path/to/server/", {method:"PUT", body:blob}))
To transmit body for a request request, run these
steps:
Let body be request’s body.
If body is null, then queue a fetch task on request to process request end-of-body for request and abort these steps.
Let read be the result of reading a chunk from body’s stream.
When read is fulfilled with an object whose done property is false and whose value property is a Uint8Array object, run these
substeps:
Let bytes be the byte sequence represented by the Uint8Array object.
Transmit bytes.
Increase body’s transmitted bytes by bytes’s length.
Run the above step again.
When read is fulfilled with an object whose done property is true, queue a fetch task on request to process request end-of-body
for request.
When read is fulfilled with a value that matches with neither of the above patterns, or read is rejected, terminate the ongoing
fetch with reason fatal.
See also
Progress indicators for fetch?
Fetch with ReadableStream