I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.
The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.
I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.
It's written as an Angular service called from a controller, I think just the service code is pertinent:
(function() {
'use strict';
angular
.module('app.core')
.factory('blobUploadService',
[
'$http', 'stringUtilities',
blobUploadService
]);
function blobUploadService($http, stringUtilities) {
var defaultBlockSize = 1024 * 1024; // Default to 1024KB
var stopWatch = {};
var state = {};
var initializeState = function(config) {
var blockSize = defaultBlockSize;
if (config.blockSize) blockSize = config.blockSize;
var maxBlockSize = blockSize;
var numberOfBlocks = 1;
var file = config.file;
var fileSize = file.size;
if (fileSize < blockSize) {
maxBlockSize = fileSize;
}
if (fileSize % maxBlockSize === 0) {
numberOfBlocks = fileSize / maxBlockSize;
} else {
numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
}
return {
maxBlockSize: maxBlockSize,
numberOfBlocks: numberOfBlocks,
totalBytesRemaining: fileSize,
currentFilePointer: 0,
blockIds: new Array(),
blockIdPrefix: 'block-',
bytesUploaded: 0,
submitUri: null,
file: file,
baseUrl: config.baseUrl,
sasToken: config.sasToken,
fileUrl: config.baseUrl + config.sasToken,
progress: config.progress,
complete: config.complete,
error: config.error,
cancelled: false
};
};
/* config: {
baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
sasToken: // Shared access signature querystring key/value prefixed with ?,
file: // File object using the HTML5 File API,
progress: // progress callback function,
complete: // complete callback function,
error: // error callback function,
blockSize: // Use this to override the defaultBlockSize
} */
var upload = function(config) {
state = initializeState(config);
var reader = new FileReader();
reader.onloadend = function(evt) {
if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
$http.put(uri,
requestData,
{
headers: {
'x-ms-blob-type': 'BlockBlob',
'Content-Type': state.file.type
},
transformRequest: []
})
.success(function(data, status, headers, config) {
state.bytesUploaded += requestData.length;
var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
).toFixed(2);
if (state.progress) state.progress(percentComplete, data, status, headers, config);
uploadFileInBlocks(reader, state);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
});
}
};
uploadFileInBlocks(reader, state);
return {
cancel: function() {
state.cancelled = true;
}
};
};
function cancel() {
stopWatch = {};
state.cancelled = true;
return true;
}
function startStopWatch(handle) {
if (stopWatch[handle] === undefined) {
stopWatch[handle] = {};
stopWatch[handle].start = Date.now();
}
}
function stopStopWatch(handle) {
stopWatch[handle].stop = Date.now();
var duration = stopWatch[handle].stop - stopWatch[handle].start;
delete stopWatch[handle];
return duration;
}
var commitBlockList = function(state) {
var uri = state.fileUrl + '&comp=blocklist';
var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < state.blockIds.length; i++) {
requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
}
requestBody += '</BlockList>';
$http.put(uri,
requestBody,
{
headers: {
'x-ms-blob-content-type': state.file.type
}
})
.success(function(data, status, headers, config) {
if (state.complete) state.complete(data, status, headers, config);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
// called asynchronously if an error occurs
// or server returns response with an error status.
});
};
var uploadFileInBlocks = function(reader, state) {
if (!state.cancelled) {
if (state.totalBytesRemaining > 0) {
var fileContent = state.file.slice(state.currentFilePointer,
state.currentFilePointer + state.maxBlockSize);
var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);
state.blockIds.push(btoa(blockId));
reader.readAsArrayBuffer(fileContent);
state.currentFilePointer += state.maxBlockSize;
state.totalBytesRemaining -= state.maxBlockSize;
if (state.totalBytesRemaining < state.maxBlockSize) {
state.maxBlockSize = state.totalBytesRemaining;
}
} else {
commitBlockList(state);
}
}
};
return {
upload: upload,
cancel: cancel,
startStopWatch: startStopWatch,
stopStopWatch: stopStopWatch
};
};
})();
Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.
I should say my solution is heavily based on Gaurav Mantri's blog post here:
http://gauravmantri.com/2013/02/16/uploading-large-files-in-windows-azure-blob-storage-using-shared-access-signature-html-and-javascript/#comment-47480
I can't see any obvious memory leaks or things I can change to help
garbage collection. I store the block IDs in an array so obviously
there will be some memory creeep but this shouldn't be massive. It's
almost as if the File API is holding the whole file it slices into
memory.
You are correct. The new Blobs created by .slice() are being held in memory.
The solution is to call Blob.prototype.close() on the Blob reference when processing Blob or File object is complete.
Note also, at javascript at Question also creates a new instance of FileReader if upload function is called more than once.
4.3.1. The slice method
The slice() method returns a new Blob object with bytes ranging
from the optional start parameter up to but not including the
optional end parameter, and with a type attribute that is the
value of the optional contentType parameter.
Blob instances exist for the life of document. Though Blob should be garbage collected once removed from Blob URL Store
9.6. Lifetime of Blob URLs
Note: User agents are free to garbage collect resources removed from
the Blob URL Store.
Each Blob must have an internal snapshot state, which must be
initially set to the state of the underlying storage, if any such
underlying storage exists, and must be preserved through
StructuredClone. Further normative definition of snapshot state can
be found for Files.
4.3.2. The close method
The close() method is said to close a Blob, and must act as
follows:
If the readability state of the context object is CLOSED, terminate this algorithm.
Otherwise, set the readability state of the context object to CLOSED.
If the context object has an entry in the Blob URL Store, remove the entry that corresponds to the context object.
If Blob object is passed to URL.createObjectURL(), call URL.revokeObjectURL() on Blob or File object, then call .close().
The revokeObjectURL(url) static method
Revokes the Blob URL provided in the string url by removing the corresponding entry from the Blob URL Store. This method must act
as follows:
1. If the url refers to a Blob that has a readability state of CLOSED OR if the value provided for the url argument is
not a Blob URL, OR if the value provided for the url argument does
not have an entry in the Blob URL Store, this method call does
nothing. User agents may display a message on the error console.
2. Otherwise, user agents must remove the entry from the Blob URL Store for url.
You can view the result of these calls by opening
chrome://blob-internals
reviewing details of before and after calls which create Blob and close Blob.
For example, from
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
Type: data
Length: 3
to
xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
following call to .close(). Similarly from
blob:http://example.com/c2823f75-de26-46f9-a4e5-95f57b8230bd
Uuid: 29e430a6-f093-40c2-bc70-2b6838a713bc
An alternative approach could be to send file as an ArrayBuffer or chunks of array buffers. Then re-assemble the file at server.
Or you can call FileReader constructor, FileReader.prototype.readAsArrayBuffer(), and load event of FileReader each once.
At load event of FileReader pass ArrayBuffer to Uint8Array, use ReadableStream, TypedArray.prototype.subarray(), .getReader(), .read() to get N chunks of ArrayBuffer as a TypedArray at pull from Uint8Array. When N chunks equaling .byteLength of ArrayBuffer have been processed, pass array of Uint8Arrays to Blob constructor to recombine file parts into single file at browser; then send Blob to server.
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<input id="file" type="file">
<br>
<progress value="0"></progress>
<br>
<output for="file"><img alt="preview"></output>
<script type="text/javascript">
const [input, output, img, progress, fr, handleError, CHUNK] = [
document.querySelector("input[type='file']")
, document.querySelector("output[for='file']")
, document.querySelector("output img")
, document.querySelector("progress")
, new FileReader
, (err) => console.log(err)
, 1024 * 1024
];
progress.addEventListener("progress", e => {
progress.value = e.detail.value;
e.detail.promise();
});
let [chunks, NEXT, CURR, url, blob] = [Array(), 0, 0];
input.onchange = () => {
NEXT = CURR = progress.value = progress.max = chunks.length = 0;
if (url) {
URL.revokeObjectURL(url);
if (blob.hasOwnProperty("close")) {
blob.close();
}
}
if (input.files.length) {
console.log(input.files[0]);
progress.max = input.files[0].size;
progress.step = progress.max / CHUNK;
fr.readAsArrayBuffer(input.files[0]);
}
}
fr.onload = () => {
const VIEW = new Uint8Array(fr.result);
const LEN = VIEW.byteLength;
const {type, name:filename} = input.files[0];
const stream = new ReadableStream({
pull(controller) {
if (NEXT < LEN) {
controller
.enqueue(VIEW.subarray(NEXT, !NEXT ? CHUNK : CHUNK + NEXT));
NEXT += CHUNK;
} else {
controller.close();
}
},
cancel(reason) {
console.log(reason);
throw new Error(reason);
}
});
const [reader, processData] = [
stream.getReader()
, ({value, done}) => {
if (done) {
return reader.closed.then(() => chunks);
}
chunks.push(value);
return new Promise(resolve => {
progress.dispatchEvent(
new CustomEvent("progress", {
detail:{
value:CURR += value.byteLength,
promise:resolve
}
})
);
})
.then(() => reader.read().then(data => processData(data)))
.catch(e => reader.cancel(e))
}
];
reader.read()
.then(data => processData(data))
.then(data => {
blob = new Blob(data, {type});
console.log("complete", data, blob);
if (/image/.test(type)) {
url = URL.createObjectURL(blob);
img.onload = () => {
img.title = filename;
input.value = "";
}
img.src = url;
} else {
input.value = "";
}
})
.catch(e => handleError(e))
}
</script>
</body>
</html>
plnkr http://plnkr.co/edit/AEZ7iQce4QaJOKut71jk?p=preview
You can also use utilize fetch()
fetch(new Request("/path/to/server/", {method:"PUT", body:blob}))
To transmit body for a request request, run these
steps:
Let body be request’s body.
If body is null, then queue a fetch task on request to process request end-of-body for request and abort these steps.
Let read be the result of reading a chunk from body’s stream.
When read is fulfilled with an object whose done property is false and whose value property is a Uint8Array object, run these
substeps:
Let bytes be the byte sequence represented by the Uint8Array object.
Transmit bytes.
Increase body’s transmitted bytes by bytes’s length.
Run the above step again.
When read is fulfilled with an object whose done property is true, queue a fetch task on request to process request end-of-body
for request.
When read is fulfilled with a value that matches with neither of the above patterns, or read is rejected, terminate the ongoing
fetch with reason fatal.
See also
Progress indicators for fetch?
Fetch with ReadableStream
Related
I've been trying to find a clear answer on this for a while now, since some have only been for big CSV files for example.
The following code works perfectly for smaller files, whenever size exceeds 100 MB, upload process takes a big chunk of memory apparently.
I'm already slicing the files into smaller chunks as well, however apparently the initial reading of the file is causing this issue.
Question: How should I change the following code to prevent the crashes to happen?
axios.post(`/URL_GOES_HERE`, {
name: file.name,
size: file.size,
md5,
extension: file.type.split('/')[1]
})
.then((response) => {
const reader = new FileReader();
reader.onload = (e) => {
const { chunks, type, status } = response.data;
if (status === 'uploading') {
let start = 0;
chunks.forEach(async ({ materialId, size, index }) => {
window.file0 = file;
const buf = reader.result.slice(start, start + size);
const arrayBuf = new Uint8Array(buf);
start = start + size;
const url = `URL_FOR_EACH_CHUNK`;
let xhr = new XMLHttpRequest();
xhr.open('PUT', url, false);
xhr.setRequestHeader(
'Content-Type',
'application/octet-stream'
);
xhr.setRequestHeader(
'Authorization',
`Bearer TOKEN_GOES_HERE`
);
xhr.send(arrayBuf);
});
}
};
reader.readAsArrayBuffer(file);
})
This is a bad way to upload files by slicing and reading the data.
just send the file as it's without reading the content
const res = await fetch('URL_GOES_HERE', {
method: 'post',
body: file
})
await res.text()
if you really need to upload them partially with chunks then just slice the blob and upload that without sending arraybuffer
chunk_to_send = file.slice(start, end)
xhr.send(chunk_to_send)
Browser will be able to pipe the data as a stream and upload them without any memory concern
var profileImage = fileInputInByteArray;
$.ajax({
url: 'abc.com/',
type: 'POST',
dataType: 'json',
data: {
// Other data
ProfileImage: profileimage
// Other data
},
success: {
}
})
// Code in WebAPI
[HttpPost]
public HttpResponseMessage UpdateProfile([FromUri]UpdateProfileModel response) {
//...
return response;
}
public class UpdateProfileModel {
// ...
public byte[] ProfileImage {get ;set; }
// ...
}
<input type="file" id="inputFile" />
I am using ajax call to post byte[] value of a input type = file input to web api which receives in byte[] format. However, I am experiencing difficulty of getting byte array. I am expecting that we can get the byte array through File API.
Note: I need to store the byte array in a variable first before passing through ajax call
[Edit]
As noted in comments above, while still on some UA implementations, readAsBinaryString method didn't made its way to the specs and should not be used in production.
Instead, use readAsArrayBuffer and loop through it's buffer to get back the binary string :
document.querySelector('input').addEventListener('change', function() {
var reader = new FileReader();
reader.onload = function() {
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file" />
<div id="result"></div>
For a more robust way to convert your arrayBuffer in binary string, you can refer to this answer.
[old answer] (modified)
Yes, the file API does provide a way to convert your File, in the <input type="file"/> to a binary string, thanks to the FileReader Object and its method readAsBinaryString.
[But don't use it in production !]
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var binaryString = this.result;
document.querySelector('#result').innerHTML = binaryString;
}
reader.readAsBinaryString(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
If you want an array buffer, then you can use the readAsArrayBuffer() method :
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result;
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
$(document).ready(function(){
(function (document) {
var input = document.getElementById("files"),
output = document.getElementById("result"),
fileData; // We need fileData to be visible to getBuffer.
// Eventhandler for file input.
function openfile(evt) {
var files = input.files;
// Pass the file to the blob, not the input[0].
fileData = new Blob([files[0]]);
// Pass getBuffer to promise.
var promise = new Promise(getBuffer);
// Wait for promise to be resolved, or log error.
promise.then(function(data) {
// Here you can pass the bytes to another function.
output.innerHTML = data.toString();
console.log(data);
}).catch(function(err) {
console.log('Error: ',err);
});
}
/*
Create a function which will be passed to the promise
and resolve it when FileReader has finished loading the file.
*/
function getBuffer(resolve) {
var reader = new FileReader();
reader.readAsArrayBuffer(fileData);
reader.onload = function() {
var arrayBuffer = reader.result
var bytes = new Uint8Array(arrayBuffer);
resolve(bytes);
}
}
// Eventlistener for file input.
input.addEventListener('change', openfile, false);
}(document));
});
<!DOCTYPE html>
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
</head>
<body>
<input type="file" id="files"/>
<div id="result"></div>
</body>
</html>
Modern browsers now have the arrayBuffer method on Blob's:
document.querySelector('input').addEventListener('change', async (event) => {
const buffer = await event.target.files[0].arrayBuffer()
console.log(buffer)
}, false)
🎉 🎉
This is a long post, but I was tired of all these examples that weren't working for me because they used Promise objects or an errant this that has a different meaning when you are using Reactjs. My implementation was using a DropZone with reactjs, and I got the bytes using a framework similar to what is posted at this following site, when nothing else above would work: https://www.mokuji.me/article/drop-upload-tutorial-1 . There were 2 keys, for me:
You have to get the bytes from the event object, using and during a FileReader's onload function.
I tried various combinations, but in the end, what worked was:
const bytes = e.target.result.split('base64,')[1];
Where e is the event. React requires const, you could use var in plain Javascript. But that gave me the base64 encoded byte string.
So I'm just going to include the applicable lines for integrating this as if you were using React, because that's how I was building it, but try to also generalize this, and add comments where necessary, to make it applicable to a vanilla Javascript implementation - caveated that I did not use it like that in such a construct to test it.
These would be your bindings at the top, in your constructor, in a React framework (not relevant to a vanilla Javascript implementation):
this.uploadFile = this.uploadFile.bind(this);
this.processFile = this.processFile.bind(this);
this.errorHandler = this.errorHandler.bind(this);
this.progressHandler = this.progressHandler.bind(this);
And you'd have onDrop={this.uploadFile} in your DropZone element. If you were doing this without React, this is the equivalent of adding the onclick event handler you want to run when you click the "Upload File" button.
<button onclick="uploadFile(event);" value="Upload File" />
Then the function (applicable lines... I'll leave out my resetting my upload progress indicator, etc.):
uploadFile(event){
// This is for React, only
this.setState({
files: event,
});
console.log('File count: ' + this.state.files.length);
// You might check that the "event" has a file & assign it like this
// in vanilla Javascript:
// var files = event.target.files;
// if (!files && files.length > 0)
// files = (event.dataTransfer ? event.dataTransfer.files :
// event.originalEvent.dataTransfer.files);
// You cannot use "files" as a variable in React, however:
const in_files = this.state.files;
// iterate, if files length > 0
if (in_files.length > 0) {
for (let i = 0; i < in_files.length; i++) {
// use this, instead, for vanilla JS:
// for (var i = 0; i < files.length; i++) {
const a = i + 1;
console.log('in loop, pass: ' + a);
const f = in_files[i]; // or just files[i] in vanilla JS
const reader = new FileReader();
reader.onerror = this.errorHandler;
reader.onprogress = this.progressHandler;
reader.onload = this.processFile(f);
reader.readAsDataURL(f);
}
}
}
There was this question on that syntax, for vanilla JS, on how to get that file object:
JavaScript/HTML5/jQuery Drag-And-Drop Upload - "Uncaught TypeError: Cannot read property 'files' of undefined"
Note that React's DropZone will already put the File object into this.state.files for you, as long as you add files: [], to your this.state = { .... } in your constructor. I added syntax from an answer on that post on how to get your File object. It should work, or there are other posts there that can help. But all that Q/A told me was how to get the File object, not the blob data, itself. And even if I did fileData = new Blob([files[0]]); like in sebu's answer, which didn't include var with it for some reason, it didn't tell me how to read that blob's contents, and how to do it without a Promise object. So that's where the FileReader came in, though I actually tried and found I couldn't use their readAsArrayBuffer to any avail.
You will have to have the other functions that go along with this construct - one to handle onerror, one for onprogress (both shown farther below), and then the main one, onload, that actually does the work once a method on reader is invoked in that last line. Basically you are passing your event.dataTransfer.files[0] straight into that onload function, from what I can tell.
So the onload method calls my processFile() function (applicable lines, only):
processFile(theFile) {
return function(e) {
const bytes = e.target.result.split('base64,')[1];
}
}
And bytes should have the base64 bytes.
Additional functions:
errorHandler(e){
switch (e.target.error.code) {
case e.target.error.NOT_FOUND_ERR:
alert('File not found.');
break;
case e.target.error.NOT_READABLE_ERR:
alert('File is not readable.');
break;
case e.target.error.ABORT_ERR:
break; // no operation
default:
alert('An error occurred reading this file.');
break;
}
}
progressHandler(e) {
if (e.lengthComputable){
const loaded = Math.round((e.loaded / e.total) * 100);
let zeros = '';
// Percent loaded in string
if (loaded >= 0 && loaded < 10) {
zeros = '00';
}
else if (loaded < 100) {
zeros = '0';
}
// Display progress in 3-digits and increase bar length
document.getElementById("progress").textContent = zeros + loaded.toString();
document.getElementById("progressBar").style.width = loaded + '%';
}
}
And applicable progress indicator markup:
<table id="tblProgress">
<tbody>
<tr>
<td><b><span id="progress">000</span>%</b> <span className="progressBar"><span id="progressBar" /></span></td>
</tr>
</tbody>
</table>
And CSS:
.progressBar {
background-color: rgba(255, 255, 255, .1);
width: 100%;
height: 26px;
}
#progressBar {
background-color: rgba(87, 184, 208, .5);
content: '';
width: 0;
height: 26px;
}
EPILOGUE:
Inside processFile(), for some reason, I couldn't add bytes to a variable I carved out in this.state. So, instead, I set it directly to the variable, attachments, that was in my JSON object, RequestForm - the same object as my this.state was using. attachments is an array so I could push multiple files. It went like this:
const fileArray = [];
// Collect any existing attachments
if (RequestForm.state.attachments.length > 0) {
for (let i=0; i < RequestForm.state.attachments.length; i++) {
fileArray.push(RequestForm.state.attachments[i]);
}
}
// Add the new one to this.state
fileArray.push(bytes);
// Update the state
RequestForm.setState({
attachments: fileArray,
});
Then, because this.state already contained RequestForm:
this.stores = [
RequestForm,
]
I could reference it as this.state.attachments from there on out. React feature that isn't applicable in vanilla JS. You could build a similar construct in plain JavaScript with a global variable, and push, accordingly, however, much easier:
var fileArray = new Array(); // place at the top, before any functions
// Within your processFile():
var newFileArray = [];
if (fileArray.length > 0) {
for (var i=0; i < fileArray.length; i++) {
newFileArray.push(fileArray[i]);
}
}
// Add the new one
newFileArray.push(bytes);
// Now update the global variable
fileArray = newFileArray;
Then you always just reference fileArray, enumerate it for any file byte strings, e.g. var myBytes = fileArray[0]; for the first file.
This is simple way to convert files to Base64 and avoid "maximum call stack size exceeded at FileReader.reader.onload" with the file has big size.
document.querySelector('#fileInput').addEventListener('change', function () {
var reader = new FileReader();
var selectedFile = this.files[0];
reader.onload = function () {
var comma = this.result.indexOf(',');
var base64 = this.result.substr(comma + 1);
console.log(base64);
}
reader.readAsDataURL(selectedFile);
}, false);
<input id="fileInput" type="file" />
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
Here is one answer to get the actual final byte array , just using FileReader and ArrayBuffer :
const test_function = async () => {
... ... ...
const get_file_array = (file) => {
return new Promise((acc, err) => {
const reader = new FileReader();
reader.onload = (event) => { acc(event.target.result) };
reader.onerror = (err) => { err(err) };
reader.readAsArrayBuffer(file);
});
}
const temp = await get_file_array(files[0])
console.log('here we finally ve the file as a ArrayBuffer : ',temp);
const fileb = new Uint8Array(fileb)
... ... ...
}
where file is directly the File object u want to read , this has to be done in a async function...
I have an AngularJS code to perform upload of files. At first I thought it works OK but when I've tried to upload bigger files (e.g. 5 files, 10 MB each) then I saw that this code has very poor performance regarding memory handling.
For example: when I attach 5 files (10 MB each = 50 MB in total) then peak browser process memory demand reaches 2 GB!
I've tested it in Firefox, Chrome and IE. Additionally, in Chrome when browser process reaches 3 GB of memory committed then the page crashes.
In Firefox I can see an error in console when trying to upload more than 80 MB at once:
out of memory
I've tried to modify the parts where the actual request is made and removed data: JSON.stringify(formattedData) which was the first bottleneck. And it turned out that stringifying is redundant there.
But the second bottleneck still exists which is the function binArrayToJson which takes the ArrayBuffer and reads the data into the byte array.
var binArrayToJson = function (binArray) {
var str = [binArray.length];
for (var i = 0, binLength = binArray.length; i < binLength; i++) {
str[i] = binArray[i];
}
return str;
}
var applyAttachments = function (data1) {
angular.forEach(data1,
function (value, key) {
if (key === "Attachments") {
for (var i = 0; i < value.length; i++) {
if (value[i].file) { //already saved item don't contain 'file' property
var reader = new FileReader();
reader.onloadend = (function (f) {
return function (result) {
var arrayBuffer = result.target.result;
value[f].fileContent = binArrayToJson(new Uint8Array(arrayBuffer));
value[f].isUploaded = true;
};
})(i);
reader.readAsArrayBuffer(value[i].file);
}
}
}
});
};
var createMVCModel = function (output) {
var defaultStringValue = "";
return {
id: output.id,
Name: output.name || defaultStringValue,
Status: output.status || "Draft",
Date: output.date || null
Attachments: output.attachments || []
};
};
var saveModel = function (data, url) {
var formattedData = createMVCModel(data);
var deferred = $q.defer();
applyAttachments(formattedData);
var check = function () {
if (allNewFilesUploaded(formattedData) === true) {
$http({
url: url,
method: "POST",
data: formattedData,
headers: { 'Content-Type': undefined }
})
.then(function (result) {
deferred.resolve(result);
},
function (result) {
deferred.reject(result);
});
} else {
setTimeout(check, 1000);
}
}
check();
return deferred.promise;
};
I omitted the parts where the following requirements are being met:
checking number of files (limit is 10)
checking size of each file (limit is 10 MB each)
checking permitted file extensions
sending rest of the (text) data along with files to the server (ASP.NET MVC method)
Doing Multiple $http.post Requests Directly from a FileList
To avoid memory problems, send the files directly:
$scope.upload = function(url, fileList) {
var config = { headers: { 'Content-Type': undefined },
transformResponse: angular.identity
};
var promises = fileList.map(function(file,index) {
config.params = {'n': index, 'name': file.name};
return $http.post(url, file, config);
});
return $q.all(promises);
};
When sending a POST with a File object, it is important to set 'Content-Type': undefined. The XHR send method will then detect the File object and automatically set the content type.
I am using the Office Javascript API to write an Add-in for Word using Angular.
I want to retrieve the Word document through the API, then convert it to a file and upload it via POST to a server.
The code I am using is nearly identical to the documentation code that Microsoft provides for this use case: https://dev.office.com/reference/add-ins/shared/document.getfileasync#example---get-a-document-in-office-open-xml-compressed-format
The server endpoint requires uploads to be POSTed through a multipart form, so I create a FormData object on which I append the file (a blob) as well as some metadata, when creating the $http call.
The file is being transmitted to the server, but when I open it, it has become corrupted and it can no longer be opened by Word.
According to the documentation, the Office.context.document.getFileAsync function returns a byte array. However, the resulting fileContent variable is a string. When I console.log this string it seems to be compressed data, like it should be.
My guess is I need to do some preprocessing before turning the string into a Blob. But which preprocessing? Base64 encoding through atob doesn't seem to be doing anything.
let sendFile = ( fileContent ) => {
let blob = new Blob([fileContent], { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' }),
fd = new FormData();
blob.lastModifiedDate = new Date();
fd.append('file', blob, 'uploaded_file_test403.docx');
fd.append('case_id', caseIdReducer.data());
$http.post('/file/create', fd, {
transformRequest: angular.identity,
headers: { 'Content-Type': undefined }
})
.success( ( ) => {
console.log('upload succeeded');
})
.error(( ) => {
console.log('upload failed');
});
};
function onGotAllSlices(docdataSlices) {
let docdata = [];
for (let i = 0; i < docdataSlices.length; i++) {
docdata = docdata.concat(docdataSlices[i]);
}
let fileContent = new String();
for (let j = 0; j < docdata.length; j++) {
fileContent += String.fromCharCode(docdata[j]);
}
// Now all the file content is stored in 'fileContent' variable,
// you can do something with it, such as print, fax...
sendFile(fileContent);
}
function getSliceAsync(file, nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived) {
file.getSliceAsync(nextSlice, (sliceResult) => {
if (sliceResult.status === 'succeeded') {
if (!gotAllSlices) { // Failed to get all slices, no need to continue.
return;
}
// Got one slice, store it in a temporary array.
// (Or you can do something else, such as
// send it to a third-party server.)
docdataSlices[sliceResult.value.index] = sliceResult.value.data;
if (++slicesReceived === sliceCount) {
// All slices have been received.
file.closeAsync();
onGotAllSlices(docdataSlices);
} else {
getSliceAsync(file, ++nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
}
} else {
gotAllSlices = false;
file.closeAsync();
console.log(`getSliceAsync Error: ${sliceResult.error.message}`);
}
});
}
// User clicks button to start document retrieval from Word and uploading to server process
ctrl.handleClick = ( ) => {
Office.context.document.getFileAsync(Office.FileType.Compressed, { sliceSize: 65536 /*64 KB*/ },
(result) => {
if (result.status === 'succeeded') {
// If the getFileAsync call succeeded, then
// result.value will return a valid File Object.
let myFile = result.value,
sliceCount = myFile.sliceCount,
slicesReceived = 0, gotAllSlices = true, docdataSlices = [];
// Get the file slices.
getSliceAsync(myFile, 0, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
} else {
console.log(`Error: ${result.error.message}`);
}
}
);
};
I ended up doing this with the fileContent string:
let bytes = new Uint8Array(fileContent.length);
for (let i = 0; i < bytes.length; i++) {
bytes[i] = fileContent.charCodeAt(i);
}
I then proceed to build the Blob with these bytes:
let blob = new Blob([bytes], { type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' });
If I then send this via a POST request, the file isn't mangled and can be opened correctly by Word.
I still get the feeling this can be achieved with less hassle / less steps. If anyone has a better solution, I'd be very interested to learn.
thx for your answer, Uint8Array was the solution. Just a little improvement, to avoid creating the string:
let bytes = new Uint8Array(docdata.length);
for (var i = 0; i < docdata.length; i++) {
bytes[i] = docdata[i];
}
Pff! what is wrong with a getting a instance of File and not using FileReader api? c'mon Microsoft!
You should take the byte array and throw it into the blob constructor, turning a binary blob to string in javascript is a bad idea that can lead to "out of range" error or incorrect encoding
just do something along with this
var byteArray = new Uint8Array(3)
byteArray[0] = 97
byteArray[1] = 98
byteArray[2] = 99
new Blob([byteArray])
if the chunk is an instance of a typed arrays or a instance of blob/file. in that case you can just do:
blob = new Blob([blob, chunk])
And please... don't base64 encode it (~3x larger + slower)
var profileImage = fileInputInByteArray;
$.ajax({
url: 'abc.com/',
type: 'POST',
dataType: 'json',
data: {
// Other data
ProfileImage: profileimage
// Other data
},
success: {
}
})
// Code in WebAPI
[HttpPost]
public HttpResponseMessage UpdateProfile([FromUri]UpdateProfileModel response) {
//...
return response;
}
public class UpdateProfileModel {
// ...
public byte[] ProfileImage {get ;set; }
// ...
}
<input type="file" id="inputFile" />
I am using ajax call to post byte[] value of a input type = file input to web api which receives in byte[] format. However, I am experiencing difficulty of getting byte array. I am expecting that we can get the byte array through File API.
Note: I need to store the byte array in a variable first before passing through ajax call
[Edit]
As noted in comments above, while still on some UA implementations, readAsBinaryString method didn't made its way to the specs and should not be used in production.
Instead, use readAsArrayBuffer and loop through it's buffer to get back the binary string :
document.querySelector('input').addEventListener('change', function() {
var reader = new FileReader();
reader.onload = function() {
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file" />
<div id="result"></div>
For a more robust way to convert your arrayBuffer in binary string, you can refer to this answer.
[old answer] (modified)
Yes, the file API does provide a way to convert your File, in the <input type="file"/> to a binary string, thanks to the FileReader Object and its method readAsBinaryString.
[But don't use it in production !]
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var binaryString = this.result;
document.querySelector('#result').innerHTML = binaryString;
}
reader.readAsBinaryString(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
If you want an array buffer, then you can use the readAsArrayBuffer() method :
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result;
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
$(document).ready(function(){
(function (document) {
var input = document.getElementById("files"),
output = document.getElementById("result"),
fileData; // We need fileData to be visible to getBuffer.
// Eventhandler for file input.
function openfile(evt) {
var files = input.files;
// Pass the file to the blob, not the input[0].
fileData = new Blob([files[0]]);
// Pass getBuffer to promise.
var promise = new Promise(getBuffer);
// Wait for promise to be resolved, or log error.
promise.then(function(data) {
// Here you can pass the bytes to another function.
output.innerHTML = data.toString();
console.log(data);
}).catch(function(err) {
console.log('Error: ',err);
});
}
/*
Create a function which will be passed to the promise
and resolve it when FileReader has finished loading the file.
*/
function getBuffer(resolve) {
var reader = new FileReader();
reader.readAsArrayBuffer(fileData);
reader.onload = function() {
var arrayBuffer = reader.result
var bytes = new Uint8Array(arrayBuffer);
resolve(bytes);
}
}
// Eventlistener for file input.
input.addEventListener('change', openfile, false);
}(document));
});
<!DOCTYPE html>
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
</head>
<body>
<input type="file" id="files"/>
<div id="result"></div>
</body>
</html>
Modern browsers now have the arrayBuffer method on Blob's:
document.querySelector('input').addEventListener('change', async (event) => {
const buffer = await event.target.files[0].arrayBuffer()
console.log(buffer)
}, false)
🎉 🎉
This is a long post, but I was tired of all these examples that weren't working for me because they used Promise objects or an errant this that has a different meaning when you are using Reactjs. My implementation was using a DropZone with reactjs, and I got the bytes using a framework similar to what is posted at this following site, when nothing else above would work: https://www.mokuji.me/article/drop-upload-tutorial-1 . There were 2 keys, for me:
You have to get the bytes from the event object, using and during a FileReader's onload function.
I tried various combinations, but in the end, what worked was:
const bytes = e.target.result.split('base64,')[1];
Where e is the event. React requires const, you could use var in plain Javascript. But that gave me the base64 encoded byte string.
So I'm just going to include the applicable lines for integrating this as if you were using React, because that's how I was building it, but try to also generalize this, and add comments where necessary, to make it applicable to a vanilla Javascript implementation - caveated that I did not use it like that in such a construct to test it.
These would be your bindings at the top, in your constructor, in a React framework (not relevant to a vanilla Javascript implementation):
this.uploadFile = this.uploadFile.bind(this);
this.processFile = this.processFile.bind(this);
this.errorHandler = this.errorHandler.bind(this);
this.progressHandler = this.progressHandler.bind(this);
And you'd have onDrop={this.uploadFile} in your DropZone element. If you were doing this without React, this is the equivalent of adding the onclick event handler you want to run when you click the "Upload File" button.
<button onclick="uploadFile(event);" value="Upload File" />
Then the function (applicable lines... I'll leave out my resetting my upload progress indicator, etc.):
uploadFile(event){
// This is for React, only
this.setState({
files: event,
});
console.log('File count: ' + this.state.files.length);
// You might check that the "event" has a file & assign it like this
// in vanilla Javascript:
// var files = event.target.files;
// if (!files && files.length > 0)
// files = (event.dataTransfer ? event.dataTransfer.files :
// event.originalEvent.dataTransfer.files);
// You cannot use "files" as a variable in React, however:
const in_files = this.state.files;
// iterate, if files length > 0
if (in_files.length > 0) {
for (let i = 0; i < in_files.length; i++) {
// use this, instead, for vanilla JS:
// for (var i = 0; i < files.length; i++) {
const a = i + 1;
console.log('in loop, pass: ' + a);
const f = in_files[i]; // or just files[i] in vanilla JS
const reader = new FileReader();
reader.onerror = this.errorHandler;
reader.onprogress = this.progressHandler;
reader.onload = this.processFile(f);
reader.readAsDataURL(f);
}
}
}
There was this question on that syntax, for vanilla JS, on how to get that file object:
JavaScript/HTML5/jQuery Drag-And-Drop Upload - "Uncaught TypeError: Cannot read property 'files' of undefined"
Note that React's DropZone will already put the File object into this.state.files for you, as long as you add files: [], to your this.state = { .... } in your constructor. I added syntax from an answer on that post on how to get your File object. It should work, or there are other posts there that can help. But all that Q/A told me was how to get the File object, not the blob data, itself. And even if I did fileData = new Blob([files[0]]); like in sebu's answer, which didn't include var with it for some reason, it didn't tell me how to read that blob's contents, and how to do it without a Promise object. So that's where the FileReader came in, though I actually tried and found I couldn't use their readAsArrayBuffer to any avail.
You will have to have the other functions that go along with this construct - one to handle onerror, one for onprogress (both shown farther below), and then the main one, onload, that actually does the work once a method on reader is invoked in that last line. Basically you are passing your event.dataTransfer.files[0] straight into that onload function, from what I can tell.
So the onload method calls my processFile() function (applicable lines, only):
processFile(theFile) {
return function(e) {
const bytes = e.target.result.split('base64,')[1];
}
}
And bytes should have the base64 bytes.
Additional functions:
errorHandler(e){
switch (e.target.error.code) {
case e.target.error.NOT_FOUND_ERR:
alert('File not found.');
break;
case e.target.error.NOT_READABLE_ERR:
alert('File is not readable.');
break;
case e.target.error.ABORT_ERR:
break; // no operation
default:
alert('An error occurred reading this file.');
break;
}
}
progressHandler(e) {
if (e.lengthComputable){
const loaded = Math.round((e.loaded / e.total) * 100);
let zeros = '';
// Percent loaded in string
if (loaded >= 0 && loaded < 10) {
zeros = '00';
}
else if (loaded < 100) {
zeros = '0';
}
// Display progress in 3-digits and increase bar length
document.getElementById("progress").textContent = zeros + loaded.toString();
document.getElementById("progressBar").style.width = loaded + '%';
}
}
And applicable progress indicator markup:
<table id="tblProgress">
<tbody>
<tr>
<td><b><span id="progress">000</span>%</b> <span className="progressBar"><span id="progressBar" /></span></td>
</tr>
</tbody>
</table>
And CSS:
.progressBar {
background-color: rgba(255, 255, 255, .1);
width: 100%;
height: 26px;
}
#progressBar {
background-color: rgba(87, 184, 208, .5);
content: '';
width: 0;
height: 26px;
}
EPILOGUE:
Inside processFile(), for some reason, I couldn't add bytes to a variable I carved out in this.state. So, instead, I set it directly to the variable, attachments, that was in my JSON object, RequestForm - the same object as my this.state was using. attachments is an array so I could push multiple files. It went like this:
const fileArray = [];
// Collect any existing attachments
if (RequestForm.state.attachments.length > 0) {
for (let i=0; i < RequestForm.state.attachments.length; i++) {
fileArray.push(RequestForm.state.attachments[i]);
}
}
// Add the new one to this.state
fileArray.push(bytes);
// Update the state
RequestForm.setState({
attachments: fileArray,
});
Then, because this.state already contained RequestForm:
this.stores = [
RequestForm,
]
I could reference it as this.state.attachments from there on out. React feature that isn't applicable in vanilla JS. You could build a similar construct in plain JavaScript with a global variable, and push, accordingly, however, much easier:
var fileArray = new Array(); // place at the top, before any functions
// Within your processFile():
var newFileArray = [];
if (fileArray.length > 0) {
for (var i=0; i < fileArray.length; i++) {
newFileArray.push(fileArray[i]);
}
}
// Add the new one
newFileArray.push(bytes);
// Now update the global variable
fileArray = newFileArray;
Then you always just reference fileArray, enumerate it for any file byte strings, e.g. var myBytes = fileArray[0]; for the first file.
This is simple way to convert files to Base64 and avoid "maximum call stack size exceeded at FileReader.reader.onload" with the file has big size.
document.querySelector('#fileInput').addEventListener('change', function () {
var reader = new FileReader();
var selectedFile = this.files[0];
reader.onload = function () {
var comma = this.result.indexOf(',');
var base64 = this.result.substr(comma + 1);
console.log(base64);
}
reader.readAsDataURL(selectedFile);
}, false);
<input id="fileInput" type="file" />
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
Here is one answer to get the actual final byte array , just using FileReader and ArrayBuffer :
const test_function = async () => {
... ... ...
const get_file_array = (file) => {
return new Promise((acc, err) => {
const reader = new FileReader();
reader.onload = (event) => { acc(event.target.result) };
reader.onerror = (err) => { err(err) };
reader.readAsArrayBuffer(file);
});
}
const temp = await get_file_array(files[0])
console.log('here we finally ve the file as a ArrayBuffer : ',temp);
const fileb = new Uint8Array(fileb)
... ... ...
}
where file is directly the File object u want to read , this has to be done in a async function...