file.slice fails second time - javascript

I'm trying to make a (front-end) Javascript that would be able to copy very large files (i.e. read them from a file input element and 'download' them using StreamSaver.js).
This is the actual code:
<html>
<header>
<title>File copying</title>
</header>
<body>
<script src="https://cdn.jsdelivr.net/npm/web-streams-polyfill#2.0.2/dist/ponyfill.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/streamsaver#2.0.3/StreamSaver.min.js"></script>
<script type="text/javascript">
const streamSaver = window.streamSaver;
async function copyFile() {
const fileInput = document.getElementById("fileInput");
const file = fileInput.files[0];
if (!file) {
alert('select a (large) file');
return;
}
const newName = file.name + " - Copy";
let remaining = file.size;
let written = 0;
const chunkSize = 1048576; // 1MB
const writeStream = streamSaver.createWriteStream(newName);
const writer = writeStream.getWriter();
while (remaining > 0) {
let readSize = chunkSize > remaining ? remaining : chunkSize;
let blob = file.slice(written, readSize);
let aBuff = await blob.arrayBuffer();
await writer.write(new Uint8Array(aBuff));
written += readSize;
remaining -= readSize;
}
await writer.close();
}
</script>
<input type="file" id="fileInput"/>
<button onclick="copyFile()">Copy file</button>
</body>
</html>
It seems that during the second loop in the while the aBuff variable value (the blob.arrayBuffer) is an empty ArrayBuffer.
Am I reading the file the wrong way? My intent is to read a (potentially huge) file, chunk by chunk and do something with each chunk (in this case just output it to the downloading file by StreamSaver.js). What better approach is available in today's browsers?

I would go with something like blob.stream() or new Response(blob).body to read all chunks of the file and potentially a TransformStream if needed. But if you need a custom slice size or better browser support than you can create your own blob -> readableStream utility
// Taken from https://www.npmjs.com/package/screw-filereader
function stream (blob) {
var position = 0
var blob = this
return new ReadableStream({
pull (controller) {
var chunk = blob.slice(position, position + 1048576)
return chunk.arrayBuffer().then(buffer => {
position += buffer.byteLength
var uint8array = new Uint8Array(buffer)
controller.enqueue(uint8array)
if (position == blob.size)
controller.close()
})
}
})
}
stream(blob).pipeTo(writeStream)
This way you can just pipe it to streamsaver instead of writing each chunk manually

Related

Chrome FileReader returns empty string for big files (>= 300MB)

Goal:
In the browser, read a file from the users file system as base64 string
These files are up to 1.5GB
Issue:
The followig script works perfectly fine on Firefox. Regardless of the filesize.
On Chrome, the script works fine for smaller files (I've tested files of ~ 5MB size)
If you pick a bigger file (e.g. 400MB) the FileReader completes without an error or exception, but returns an empty string instead of the base64 string
Questions:
Is this a chrome bug?
Why is there neither an error nor an exception?
How can I fix or work around this issue?
Important:
Please note, that chunking is not an option for me, since I need to send the full base64 string via 'POST' to an API that does not support chunks.
Code:
'use strict';
var filePickerElement = document.getElementById('filepicker');
filePickerElement.onchange = (event) => {
const selectedFile = event.target.files[0];
console.log('selectedFile', selectedFile);
readFile(selectedFile);
};
function readFile(selectedFile) {
console.log('START READING FILE');
const reader = new FileReader();
reader.onload = (e) => {
const fileBase64 = reader.result.toString();
console.log('ONLOAD','base64', fileBase64);
if (fileBase64 === '') {
alert('Result string is EMPTY :(');
} else {
alert('It worked as expected :)');
}
};
reader.onprogress = (e) => {
console.log('Progress', ~~((e.loaded / e.total) * 100 ), '%');
};
reader.onerror = (err) => {
console.error('Error reading the file.', err);
};
reader.readAsDataURL(selectedFile);
}
<!doctype html>
<html lang="en">
<head>
<!-- Required meta tags -->
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- Bootstrap CSS -->
<link href="https://cdn.jsdelivr.net/npm/bootstrap#5.0.0/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-wEmeIV1mKuiNpC+IOBjI7aAzPcEZeedi5yW5f2yOq55WWLwNGmvvx4Um1vskeMj0" crossorigin="anonymous">
<title>FileReader issue example</title>
</head>
<body>
<div class="container">
<h1>FileReader issue example</h1>
<div class="card">
<div class="card-header">
Select File:
</div>
<div class="card-body">
<input type="file" id="filepicker" />
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/bootstrap#5.0.0/dist/js/bootstrap.bundle.min.js"
integrity="sha384-p34f1UUtsS3wqzfto5wAAmdvj+osOnFyQFpp4Ua3gs/ZVWx6oOypYoCJhGGScy+8"
crossorigin="anonymous"></script>
<script src="main.js"></script>
</body>
</html>
Is this a chrome bug?
As I said in my answer to Chrome, FileReader API, event.target.result === "", this a V8 (Chrome's but also node-js's and others' JavaScript JS engine) limitation.
It is intentional and thus can't really qualify as "a bug".
The technicalities are that what actually fails here is to build a String of more than 512MB (less the header) on 64bit systems because in V8 all heap objects must fit in a Smi (Small Integer), (cf this commit).
Why is there neither an error nor an exception?
That, might be a bug... As I also show in my linked answer, we get a RangeError when creating such a string directly:
const header = 24;
const bytes = new Uint8Array( (512 * 1024 * 1024) - header );
let txt = new TextDecoder().decode( bytes );
console.log( txt.length ); // 536870888
txt += "f"; // RangeError
And in the step 3 of FileReader::readOperation, UAs have to
If package data threw an exception error:
Set fr’s error to error.
Fire a progress event called error at fr.
But here, we don't have that error.
const bytes = Uint32Array.from( { length: 600 * 1024 * 1024 / 4 }, (_) => Math.random() * 0xFFFFFFFF );
const blob = new Blob( [ bytes ] );
const fr = new FileReader();
fr.onerror = console.error;
fr.onload = (evt) => console.log( "success", fr.result.length, fr.error );
fr.readAsDataURL( blob );
I will open an issue about this, since you should be able to handle that error from the FileReader.
How can I fix or work around this issue?
The best is definitely to make your API end-point accepts binary resources directly instead of data:// URLs, which should always be avoided anyway.
If this is not doable, a solution "for the future", will be to POST a ReadableStream to your end-point, and do the data:// URL conversion yourself, on a stream from the Blob.
class base64StreamEncoder {
constructor( header ) {
if( header ) {
this.header = new TextEncoder().encode( header );
}
this.tail = [];
}
transform( chunk, controller ) {
const encoded = this.encode( chunk );
if( this.header ) {
controller.enqueue( this.header );
this.header = null;
}
controller.enqueue( encoded );
}
encode( bytes ) {
let binary = Array.from( this.tail )
.reduce( (bin, byte) => bin + String.fromCharCode( byte ), "" );
const tail_length = bytes.length % 3;
const last_index = bytes.length - tail_length;
this.tail = bytes.subarray( last_index );
for( let i = 0; i<last_index; i++ ) {
binary += String.fromCharCode( bytes[ i ] );
}
const b64String = window.btoa( binary );
return new TextEncoder().encode( b64String );
}
flush( controller ) {
// force the encoding of the tail
controller.enqueue( this.encode( new Uint8Array() ) );
}
}
Live example: https://base64streamencoder.glitch.me/
For now, you'd have to store chunks of the base64 representation in a Blob as demonstrated by Endless's answer.
However beware that since this is a V8 limitation, even the server-side can face issues with strings this big, so anyway, you should contact your API's maintainer.
Here is a partial solution that transform a blob in chunks into base64 blobs... concatenates everything into one json blob with a pre/suffix part of the json and the base64 chunks inbetween
Keeping it as a blob allows browser to optimize the memory allocation and offload it to the disk if needed.
you could try to change the chunkSize to something larger, browser likes to keep smaller blob chunks in memory (one bucket)
// get some dummy gradient file (blob)
var a=document.createElement("canvas"),b=a.getContext("2d"),c=b.createLinearGradient(0,0,3000,3000);a.width=a.height=3000;c.addColorStop(0,"red");c.addColorStop(1,"blue");b.fillStyle=c;b.fillRect(0,0,a.width,a.height);a.toBlob(main);
async function main (blob) {
var fr = new FileReader()
// Best to add 2 so it strips == from all chunks
// except from the last chunk
var chunkSize = (1 << 16) + 2
var pos = 0
var b64chunks = []
while (pos < blob.size) {
await new Promise(rs => {
fr.readAsDataURL(blob.slice(pos, pos + chunkSize))
fr.onload = () => {
const b64 = fr.result.split(',')[1]
// Keeping it as a blob allaws browser to offload memory to disk
b64chunks.push(new Blob([b64]))
rs()
}
pos += chunkSize
})
}
// How you concatinate all chunks to json is now up to you.
// this solution/answer is more of a guideline of what you need to do
// There are some ways to do it more automatically but here is the most
// simpliest form
// (fyi: this new blob won't create so much data in memory, it will only keep references points to other blobs locations)
const jsonBlob = new Blob([
'{"data": "', ...b64chunks, '"}'
], { type: 'application/json' })
/*
// strongly advice you to tell the api developers
// to add support for binary/file upload (multipart-formdata)
// base64 is roughly ~33% larger and streaming
// this data on the server to the disk is almost impossible
fetch('./upload-files-to-bad-json-only-api', {
method: 'POST',
body: jsonBlob
})
*/
// Just a test that it still works
//
// new Response(jsonBlob).json().then(console.log)
fetch('data:image/png;base64,' + await new Blob(b64chunks).text()).then(r => r.blob()).then(b => console.log(URL.createObjectURL(b)))
}
I have avoided to make base64 += fr.result.split(',')[1] and JSON.stringify since GiB of data is a lot and json shouldn't handle binary data anyway

How get a buffer object from input type=file in html5 like fs readFile in nodejs [duplicate]

var profileImage = fileInputInByteArray;
$.ajax({
url: 'abc.com/',
type: 'POST',
dataType: 'json',
data: {
// Other data
ProfileImage: profileimage
// Other data
},
success: {
}
})
// Code in WebAPI
[HttpPost]
public HttpResponseMessage UpdateProfile([FromUri]UpdateProfileModel response) {
//...
return response;
}
public class UpdateProfileModel {
// ...
public byte[] ProfileImage {get ;set; }
// ...
}
<input type="file" id="inputFile" />
I am using ajax call to post byte[] value of a input type = file input to web api which receives in byte[] format. However, I am experiencing difficulty of getting byte array. I am expecting that we can get the byte array through File API.
Note: I need to store the byte array in a variable first before passing through ajax call
[Edit]
As noted in comments above, while still on some UA implementations, readAsBinaryString method didn't made its way to the specs and should not be used in production.
Instead, use readAsArrayBuffer and loop through it's buffer to get back the binary string :
document.querySelector('input').addEventListener('change', function() {
var reader = new FileReader();
reader.onload = function() {
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file" />
<div id="result"></div>
For a more robust way to convert your arrayBuffer in binary string, you can refer to this answer.
[old answer] (modified)
Yes, the file API does provide a way to convert your File, in the <input type="file"/> to a binary string, thanks to the FileReader Object and its method readAsBinaryString.
[But don't use it in production !]
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var binaryString = this.result;
document.querySelector('#result').innerHTML = binaryString;
}
reader.readAsBinaryString(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
If you want an array buffer, then you can use the readAsArrayBuffer() method :
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result;
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
$(document).ready(function(){
(function (document) {
var input = document.getElementById("files"),
output = document.getElementById("result"),
fileData; // We need fileData to be visible to getBuffer.
// Eventhandler for file input.
function openfile(evt) {
var files = input.files;
// Pass the file to the blob, not the input[0].
fileData = new Blob([files[0]]);
// Pass getBuffer to promise.
var promise = new Promise(getBuffer);
// Wait for promise to be resolved, or log error.
promise.then(function(data) {
// Here you can pass the bytes to another function.
output.innerHTML = data.toString();
console.log(data);
}).catch(function(err) {
console.log('Error: ',err);
});
}
/*
Create a function which will be passed to the promise
and resolve it when FileReader has finished loading the file.
*/
function getBuffer(resolve) {
var reader = new FileReader();
reader.readAsArrayBuffer(fileData);
reader.onload = function() {
var arrayBuffer = reader.result
var bytes = new Uint8Array(arrayBuffer);
resolve(bytes);
}
}
// Eventlistener for file input.
input.addEventListener('change', openfile, false);
}(document));
});
<!DOCTYPE html>
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
</head>
<body>
<input type="file" id="files"/>
<div id="result"></div>
</body>
</html>
Modern browsers now have the arrayBuffer method on Blob's:
document.querySelector('input').addEventListener('change', async (event) => {
const buffer = await event.target.files[0].arrayBuffer()
console.log(buffer)
}, false)
🎉 🎉
This is a long post, but I was tired of all these examples that weren't working for me because they used Promise objects or an errant this that has a different meaning when you are using Reactjs. My implementation was using a DropZone with reactjs, and I got the bytes using a framework similar to what is posted at this following site, when nothing else above would work: https://www.mokuji.me/article/drop-upload-tutorial-1 . There were 2 keys, for me:
You have to get the bytes from the event object, using and during a FileReader's onload function.
I tried various combinations, but in the end, what worked was:
const bytes = e.target.result.split('base64,')[1];
Where e is the event. React requires const, you could use var in plain Javascript. But that gave me the base64 encoded byte string.
So I'm just going to include the applicable lines for integrating this as if you were using React, because that's how I was building it, but try to also generalize this, and add comments where necessary, to make it applicable to a vanilla Javascript implementation - caveated that I did not use it like that in such a construct to test it.
These would be your bindings at the top, in your constructor, in a React framework (not relevant to a vanilla Javascript implementation):
this.uploadFile = this.uploadFile.bind(this);
this.processFile = this.processFile.bind(this);
this.errorHandler = this.errorHandler.bind(this);
this.progressHandler = this.progressHandler.bind(this);
And you'd have onDrop={this.uploadFile} in your DropZone element. If you were doing this without React, this is the equivalent of adding the onclick event handler you want to run when you click the "Upload File" button.
<button onclick="uploadFile(event);" value="Upload File" />
Then the function (applicable lines... I'll leave out my resetting my upload progress indicator, etc.):
uploadFile(event){
// This is for React, only
this.setState({
files: event,
});
console.log('File count: ' + this.state.files.length);
// You might check that the "event" has a file & assign it like this
// in vanilla Javascript:
// var files = event.target.files;
// if (!files && files.length > 0)
// files = (event.dataTransfer ? event.dataTransfer.files :
// event.originalEvent.dataTransfer.files);
// You cannot use "files" as a variable in React, however:
const in_files = this.state.files;
// iterate, if files length > 0
if (in_files.length > 0) {
for (let i = 0; i < in_files.length; i++) {
// use this, instead, for vanilla JS:
// for (var i = 0; i < files.length; i++) {
const a = i + 1;
console.log('in loop, pass: ' + a);
const f = in_files[i]; // or just files[i] in vanilla JS
const reader = new FileReader();
reader.onerror = this.errorHandler;
reader.onprogress = this.progressHandler;
reader.onload = this.processFile(f);
reader.readAsDataURL(f);
}
}
}
There was this question on that syntax, for vanilla JS, on how to get that file object:
JavaScript/HTML5/jQuery Drag-And-Drop Upload - "Uncaught TypeError: Cannot read property 'files' of undefined"
Note that React's DropZone will already put the File object into this.state.files for you, as long as you add files: [], to your this.state = { .... } in your constructor. I added syntax from an answer on that post on how to get your File object. It should work, or there are other posts there that can help. But all that Q/A told me was how to get the File object, not the blob data, itself. And even if I did fileData = new Blob([files[0]]); like in sebu's answer, which didn't include var with it for some reason, it didn't tell me how to read that blob's contents, and how to do it without a Promise object. So that's where the FileReader came in, though I actually tried and found I couldn't use their readAsArrayBuffer to any avail.
You will have to have the other functions that go along with this construct - one to handle onerror, one for onprogress (both shown farther below), and then the main one, onload, that actually does the work once a method on reader is invoked in that last line. Basically you are passing your event.dataTransfer.files[0] straight into that onload function, from what I can tell.
So the onload method calls my processFile() function (applicable lines, only):
processFile(theFile) {
return function(e) {
const bytes = e.target.result.split('base64,')[1];
}
}
And bytes should have the base64 bytes.
Additional functions:
errorHandler(e){
switch (e.target.error.code) {
case e.target.error.NOT_FOUND_ERR:
alert('File not found.');
break;
case e.target.error.NOT_READABLE_ERR:
alert('File is not readable.');
break;
case e.target.error.ABORT_ERR:
break; // no operation
default:
alert('An error occurred reading this file.');
break;
}
}
progressHandler(e) {
if (e.lengthComputable){
const loaded = Math.round((e.loaded / e.total) * 100);
let zeros = '';
// Percent loaded in string
if (loaded >= 0 && loaded < 10) {
zeros = '00';
}
else if (loaded < 100) {
zeros = '0';
}
// Display progress in 3-digits and increase bar length
document.getElementById("progress").textContent = zeros + loaded.toString();
document.getElementById("progressBar").style.width = loaded + '%';
}
}
And applicable progress indicator markup:
<table id="tblProgress">
<tbody>
<tr>
<td><b><span id="progress">000</span>%</b> <span className="progressBar"><span id="progressBar" /></span></td>
</tr>
</tbody>
</table>
And CSS:
.progressBar {
background-color: rgba(255, 255, 255, .1);
width: 100%;
height: 26px;
}
#progressBar {
background-color: rgba(87, 184, 208, .5);
content: '';
width: 0;
height: 26px;
}
EPILOGUE:
Inside processFile(), for some reason, I couldn't add bytes to a variable I carved out in this.state. So, instead, I set it directly to the variable, attachments, that was in my JSON object, RequestForm - the same object as my this.state was using. attachments is an array so I could push multiple files. It went like this:
const fileArray = [];
// Collect any existing attachments
if (RequestForm.state.attachments.length > 0) {
for (let i=0; i < RequestForm.state.attachments.length; i++) {
fileArray.push(RequestForm.state.attachments[i]);
}
}
// Add the new one to this.state
fileArray.push(bytes);
// Update the state
RequestForm.setState({
attachments: fileArray,
});
Then, because this.state already contained RequestForm:
this.stores = [
RequestForm,
]
I could reference it as this.state.attachments from there on out. React feature that isn't applicable in vanilla JS. You could build a similar construct in plain JavaScript with a global variable, and push, accordingly, however, much easier:
var fileArray = new Array(); // place at the top, before any functions
// Within your processFile():
var newFileArray = [];
if (fileArray.length > 0) {
for (var i=0; i < fileArray.length; i++) {
newFileArray.push(fileArray[i]);
}
}
// Add the new one
newFileArray.push(bytes);
// Now update the global variable
fileArray = newFileArray;
Then you always just reference fileArray, enumerate it for any file byte strings, e.g. var myBytes = fileArray[0]; for the first file.
This is simple way to convert files to Base64 and avoid "maximum call stack size exceeded at FileReader.reader.onload" with the file has big size.
document.querySelector('#fileInput').addEventListener('change', function () {
var reader = new FileReader();
var selectedFile = this.files[0];
reader.onload = function () {
var comma = this.result.indexOf(',');
var base64 = this.result.substr(comma + 1);
console.log(base64);
}
reader.readAsDataURL(selectedFile);
}, false);
<input id="fileInput" type="file" />
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
Here is one answer to get the actual final byte array , just using FileReader and ArrayBuffer :
const test_function = async () => {
... ... ...
const get_file_array = (file) => {
return new Promise((acc, err) => {
const reader = new FileReader();
reader.onload = (event) => { acc(event.target.result) };
reader.onerror = (err) => { err(err) };
reader.readAsArrayBuffer(file);
});
}
const temp = await get_file_array(files[0])
console.log('here we finally ve the file as a ArrayBuffer : ',temp);
const fileb = new Uint8Array(fileb)
... ... ...
}
where file is directly the File object u want to read , this has to be done in a async function...

Read only the first chunk of a file in Javascript

We have an browser-based interface where a user selects a file to upload.
When a file is selected, it is loaded into a FileReader object, and the contents is loaded into memory and validated by calling FileReader.readAsText().
This works fine on 50MB files (takes 2-3 seconds).
However when you go up to 200MB in size, and even after 20 mins, the file never finished reading and a processor remains pegged at 100% utilization.
I believe this is due to memory exhaustion, because the entirety of the file is being loaded into a single variable.
Is there any way to load the contents of the file in chunks, rather than loading the entire file into one enormous, memory sucking monster?
All we need is the first 1KB or so of the file to validate, so all we'd need to do is read the first chunk, valid, and be done.
Proof of concept, to test for yourself:
<form>
<input type="file" /> File
</form>
<script type="text/javascript">
var input = document.getElementsByTagName('input')[0];
input.onchange = function(e) {
var file = input.files[0];
var reader = new FileReader();
reader.onload = function() {
var file_contents = reader.result;
alert('File Read. Size:' + file_contents.length);
};
reader.readAsText(file);
};
</script>
I would leverage the Blob interface that File implements, whose .stream() method returns a ReadableStream.
async function validateFile(file) {
const stream = file.stream();
const reader = stream.getReader();
let bytesRead = 0;
let exhausted = false;
while (bytesRead < 1e3 && !exhausted) {
const {value, done} = await reader.read();
// value is a Uint8Array
bytesRead += value ? value.length : 0;
exhausted = done;
}
}

Getting byte array through input type = file

var profileImage = fileInputInByteArray;
$.ajax({
url: 'abc.com/',
type: 'POST',
dataType: 'json',
data: {
// Other data
ProfileImage: profileimage
// Other data
},
success: {
}
})
// Code in WebAPI
[HttpPost]
public HttpResponseMessage UpdateProfile([FromUri]UpdateProfileModel response) {
//...
return response;
}
public class UpdateProfileModel {
// ...
public byte[] ProfileImage {get ;set; }
// ...
}
<input type="file" id="inputFile" />
I am using ajax call to post byte[] value of a input type = file input to web api which receives in byte[] format. However, I am experiencing difficulty of getting byte array. I am expecting that we can get the byte array through File API.
Note: I need to store the byte array in a variable first before passing through ajax call
[Edit]
As noted in comments above, while still on some UA implementations, readAsBinaryString method didn't made its way to the specs and should not be used in production.
Instead, use readAsArrayBuffer and loop through it's buffer to get back the binary string :
document.querySelector('input').addEventListener('change', function() {
var reader = new FileReader();
reader.onload = function() {
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file" />
<div id="result"></div>
For a more robust way to convert your arrayBuffer in binary string, you can refer to this answer.
[old answer] (modified)
Yes, the file API does provide a way to convert your File, in the <input type="file"/> to a binary string, thanks to the FileReader Object and its method readAsBinaryString.
[But don't use it in production !]
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var binaryString = this.result;
document.querySelector('#result').innerHTML = binaryString;
}
reader.readAsBinaryString(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
If you want an array buffer, then you can use the readAsArrayBuffer() method :
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result;
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
$(document).ready(function(){
(function (document) {
var input = document.getElementById("files"),
output = document.getElementById("result"),
fileData; // We need fileData to be visible to getBuffer.
// Eventhandler for file input.
function openfile(evt) {
var files = input.files;
// Pass the file to the blob, not the input[0].
fileData = new Blob([files[0]]);
// Pass getBuffer to promise.
var promise = new Promise(getBuffer);
// Wait for promise to be resolved, or log error.
promise.then(function(data) {
// Here you can pass the bytes to another function.
output.innerHTML = data.toString();
console.log(data);
}).catch(function(err) {
console.log('Error: ',err);
});
}
/*
Create a function which will be passed to the promise
and resolve it when FileReader has finished loading the file.
*/
function getBuffer(resolve) {
var reader = new FileReader();
reader.readAsArrayBuffer(fileData);
reader.onload = function() {
var arrayBuffer = reader.result
var bytes = new Uint8Array(arrayBuffer);
resolve(bytes);
}
}
// Eventlistener for file input.
input.addEventListener('change', openfile, false);
}(document));
});
<!DOCTYPE html>
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
</head>
<body>
<input type="file" id="files"/>
<div id="result"></div>
</body>
</html>
Modern browsers now have the arrayBuffer method on Blob's:
document.querySelector('input').addEventListener('change', async (event) => {
const buffer = await event.target.files[0].arrayBuffer()
console.log(buffer)
}, false)
🎉 🎉
This is a long post, but I was tired of all these examples that weren't working for me because they used Promise objects or an errant this that has a different meaning when you are using Reactjs. My implementation was using a DropZone with reactjs, and I got the bytes using a framework similar to what is posted at this following site, when nothing else above would work: https://www.mokuji.me/article/drop-upload-tutorial-1 . There were 2 keys, for me:
You have to get the bytes from the event object, using and during a FileReader's onload function.
I tried various combinations, but in the end, what worked was:
const bytes = e.target.result.split('base64,')[1];
Where e is the event. React requires const, you could use var in plain Javascript. But that gave me the base64 encoded byte string.
So I'm just going to include the applicable lines for integrating this as if you were using React, because that's how I was building it, but try to also generalize this, and add comments where necessary, to make it applicable to a vanilla Javascript implementation - caveated that I did not use it like that in such a construct to test it.
These would be your bindings at the top, in your constructor, in a React framework (not relevant to a vanilla Javascript implementation):
this.uploadFile = this.uploadFile.bind(this);
this.processFile = this.processFile.bind(this);
this.errorHandler = this.errorHandler.bind(this);
this.progressHandler = this.progressHandler.bind(this);
And you'd have onDrop={this.uploadFile} in your DropZone element. If you were doing this without React, this is the equivalent of adding the onclick event handler you want to run when you click the "Upload File" button.
<button onclick="uploadFile(event);" value="Upload File" />
Then the function (applicable lines... I'll leave out my resetting my upload progress indicator, etc.):
uploadFile(event){
// This is for React, only
this.setState({
files: event,
});
console.log('File count: ' + this.state.files.length);
// You might check that the "event" has a file & assign it like this
// in vanilla Javascript:
// var files = event.target.files;
// if (!files && files.length > 0)
// files = (event.dataTransfer ? event.dataTransfer.files :
// event.originalEvent.dataTransfer.files);
// You cannot use "files" as a variable in React, however:
const in_files = this.state.files;
// iterate, if files length > 0
if (in_files.length > 0) {
for (let i = 0; i < in_files.length; i++) {
// use this, instead, for vanilla JS:
// for (var i = 0; i < files.length; i++) {
const a = i + 1;
console.log('in loop, pass: ' + a);
const f = in_files[i]; // or just files[i] in vanilla JS
const reader = new FileReader();
reader.onerror = this.errorHandler;
reader.onprogress = this.progressHandler;
reader.onload = this.processFile(f);
reader.readAsDataURL(f);
}
}
}
There was this question on that syntax, for vanilla JS, on how to get that file object:
JavaScript/HTML5/jQuery Drag-And-Drop Upload - "Uncaught TypeError: Cannot read property 'files' of undefined"
Note that React's DropZone will already put the File object into this.state.files for you, as long as you add files: [], to your this.state = { .... } in your constructor. I added syntax from an answer on that post on how to get your File object. It should work, or there are other posts there that can help. But all that Q/A told me was how to get the File object, not the blob data, itself. And even if I did fileData = new Blob([files[0]]); like in sebu's answer, which didn't include var with it for some reason, it didn't tell me how to read that blob's contents, and how to do it without a Promise object. So that's where the FileReader came in, though I actually tried and found I couldn't use their readAsArrayBuffer to any avail.
You will have to have the other functions that go along with this construct - one to handle onerror, one for onprogress (both shown farther below), and then the main one, onload, that actually does the work once a method on reader is invoked in that last line. Basically you are passing your event.dataTransfer.files[0] straight into that onload function, from what I can tell.
So the onload method calls my processFile() function (applicable lines, only):
processFile(theFile) {
return function(e) {
const bytes = e.target.result.split('base64,')[1];
}
}
And bytes should have the base64 bytes.
Additional functions:
errorHandler(e){
switch (e.target.error.code) {
case e.target.error.NOT_FOUND_ERR:
alert('File not found.');
break;
case e.target.error.NOT_READABLE_ERR:
alert('File is not readable.');
break;
case e.target.error.ABORT_ERR:
break; // no operation
default:
alert('An error occurred reading this file.');
break;
}
}
progressHandler(e) {
if (e.lengthComputable){
const loaded = Math.round((e.loaded / e.total) * 100);
let zeros = '';
// Percent loaded in string
if (loaded >= 0 && loaded < 10) {
zeros = '00';
}
else if (loaded < 100) {
zeros = '0';
}
// Display progress in 3-digits and increase bar length
document.getElementById("progress").textContent = zeros + loaded.toString();
document.getElementById("progressBar").style.width = loaded + '%';
}
}
And applicable progress indicator markup:
<table id="tblProgress">
<tbody>
<tr>
<td><b><span id="progress">000</span>%</b> <span className="progressBar"><span id="progressBar" /></span></td>
</tr>
</tbody>
</table>
And CSS:
.progressBar {
background-color: rgba(255, 255, 255, .1);
width: 100%;
height: 26px;
}
#progressBar {
background-color: rgba(87, 184, 208, .5);
content: '';
width: 0;
height: 26px;
}
EPILOGUE:
Inside processFile(), for some reason, I couldn't add bytes to a variable I carved out in this.state. So, instead, I set it directly to the variable, attachments, that was in my JSON object, RequestForm - the same object as my this.state was using. attachments is an array so I could push multiple files. It went like this:
const fileArray = [];
// Collect any existing attachments
if (RequestForm.state.attachments.length > 0) {
for (let i=0; i < RequestForm.state.attachments.length; i++) {
fileArray.push(RequestForm.state.attachments[i]);
}
}
// Add the new one to this.state
fileArray.push(bytes);
// Update the state
RequestForm.setState({
attachments: fileArray,
});
Then, because this.state already contained RequestForm:
this.stores = [
RequestForm,
]
I could reference it as this.state.attachments from there on out. React feature that isn't applicable in vanilla JS. You could build a similar construct in plain JavaScript with a global variable, and push, accordingly, however, much easier:
var fileArray = new Array(); // place at the top, before any functions
// Within your processFile():
var newFileArray = [];
if (fileArray.length > 0) {
for (var i=0; i < fileArray.length; i++) {
newFileArray.push(fileArray[i]);
}
}
// Add the new one
newFileArray.push(bytes);
// Now update the global variable
fileArray = newFileArray;
Then you always just reference fileArray, enumerate it for any file byte strings, e.g. var myBytes = fileArray[0]; for the first file.
This is simple way to convert files to Base64 and avoid "maximum call stack size exceeded at FileReader.reader.onload" with the file has big size.
document.querySelector('#fileInput').addEventListener('change', function () {
var reader = new FileReader();
var selectedFile = this.files[0];
reader.onload = function () {
var comma = this.result.indexOf(',');
var base64 = this.result.substr(comma + 1);
console.log(base64);
}
reader.readAsDataURL(selectedFile);
}, false);
<input id="fileInput" type="file" />
document.querySelector('input').addEventListener('change', function(){
var reader = new FileReader();
reader.onload = function(){
var arrayBuffer = this.result,
array = new Uint8Array(arrayBuffer),
binaryString = String.fromCharCode.apply(null, array);
console.log(binaryString);
console.log(arrayBuffer);
document.querySelector('#result').innerHTML = arrayBuffer + ' '+arrayBuffer.byteLength;
}
reader.readAsArrayBuffer(this.files[0]);
}, false);
<input type="file"/>
<div id="result"></div>
Here is one answer to get the actual final byte array , just using FileReader and ArrayBuffer :
const test_function = async () => {
... ... ...
const get_file_array = (file) => {
return new Promise((acc, err) => {
const reader = new FileReader();
reader.onload = (event) => { acc(event.target.result) };
reader.onerror = (err) => { err(err) };
reader.readAsArrayBuffer(file);
});
}
const temp = await get_file_array(files[0])
console.log('here we finally ve the file as a ArrayBuffer : ',temp);
const fileb = new Uint8Array(fileb)
... ... ...
}
where file is directly the File object u want to read , this has to be done in a async function...

Get docx file contents using javascript/jquery

I want to open / read docx file using client side technologies (HTML/JS).
I have found a Javascript library named docx.js but personally cannot seem to locate any documentation for it.
(http://blog.innovatejs.com/?p=184)
The goal is to make a browser based search tool for docx files and txt files.
With docxtemplater, you can easily get the full text of a word (works with docx only) by using the doc.getFullText() method.
HTML code:
<body>
<button onclick="gettext()">Get document text</button>
</body>
<script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.26.2/docxtemplater.js"></script>
<script src="https://unpkg.com/pizzip#3.1.1/dist/pizzip.js"></script>
<script src="https://unpkg.com/pizzip#3.1.1/dist/pizzip-utils.js"></script>
<script>
function loadFile(url, callback) {
PizZipUtils.getBinaryContent(url, callback);
}
function gettext() {
loadFile(
"https://docxtemplater.com/tag-example.docx",
function (error, content) {
if (error) {
throw error;
}
var zip = new PizZip(content);
var doc = new window.docxtemplater(zip);
var text = doc.getFullText();
console.log(text);
alert("Text is " + text);
}
);
}
</script>
I know this is an old post, but doctemplater has moved on and the accepted answer no longer works. This worked for me:
function loadDocx(filename) {
// Read document.xml from docx document
const AdmZip = require("adm-zip");
const zip = new AdmZip(filename);
const xml = zip.readAsText("word/document.xml");
// Load xml DOM
const cheerio = require('cheerio');
$ = cheerio.load(xml, {
normalizeWhitespace: true,
xmlMode: true
})
// Extract text
let out = new Array()
$('w\\:t').each((i, el) => {
out.push($(el).text())
})
return out
}
You can try docxyz.
let {Document} = require('docxyz');
let fileName = 'yourfile.docx';
let document = new Document(fileName);
let text = document.text;
console.log(text);
No tables.
let {Document} = require('docxyz');
let fileName = 'yourfile.docx';
let document = new Document(fileName);
let a = [];
for(let paragraph of document.paragraphs){
a.push(paragraph.text);
}
let text = a.join('\n');
console.log(text);
This solution will give you an array of strings, one element for each paragraph in the docx :
const PizZip = require("pizzip");
const { DOMParser, XMLSerializer } = require("#xmldom/xmldom");
const fs = require("fs");
const path = require("path");
function str2xml(str) {
if (str.charCodeAt(0) === 65279) {
// BOM sequence
str = str.substr(1);
}
return new DOMParser().parseFromString(str, "text/xml");
}
function getParagraphs(content) {
const zip = new PizZip(content);
const xml = str2xml(zip.files["word/document.xml"].asText());
const paragraphsXml = xml.getElementsByTagName("w:p");
const paragraphs = [];
for (let i = 0, len = paragraphsXml.length; i < len; i++) {
let fullText = "";
const textsXml =
paragraphsXml[i].getElementsByTagName("w:t");
for (let j = 0, len2 = textsXml.length; j < len2; j++) {
const textXml = textsXml[j];
if (textXml.childNodes) {
fullText += textXml.childNodes[0].nodeValue;
}
}
paragraphs.push(fullText);
}
return paragraphs;
}
// Load the docx file as binary content
const content = fs.readFileSync(
path.resolve(__dirname, "examples/cond-image.docx"),
"binary"
);
// Will print ['Hello John', 'how are you ?'] if the document has two paragraphs.
console.log(getParagraphs(content));
Source : https://docxtemplater.com/faq/#how-can-i-retrieve-the-docx-content-as-text
If you want to be able to display the docx files in a web browser, you might be interested in Native Documents' recently released commercial Word File Editor; try it at https://nativedocuments.com/test_drive.html
You'll get much better layout fidelity if you do it this way, than if you try to convert to (X)HTML and view it that way.
It is designed specifically for embedding in a webapp, so there is an API for loading documents, and it will sit happily within the security context of your webapp.
Disclosure: I have a commercial interest in Native Documents

Categories

Resources