SHA256 Hashing Large files in angular 6 using Filereader Issue - javascript

I have problem with SHA256 hashing. if the file size is more then 250 MB it is terminating browser and crashing.
below is the hashing code please do help us.
let fileReader = new FileReader();
fileReader.readAsArrayBuffer(fileToSend);
fileReader.onload = (e) => {
const hash = CrypTo.SHA256(this.arrayBufferToWordArray(fileReader.result)).toString();
this.hashCode=hash;
this.fileHistory.MediaHash = hash;
this.fileHistory.FileName = fileToSend.name;
//Insert to file history
this.fileHistoryService.postFiles(this.fileHistory).subscribe(
data => {
this.hashCode=data["MediaHash"];
this.alertService.success('HASHFILE.FileUploadSuccessMessage', true);
this.hideGenerateHashCodeButton = true;
},
error => {
this.alertService.error('COMMONERRORMESSAGE.SomethingWentWrongErrorMessage');
});
}
arrayBufferToWordArray(fileResult) {
var i8a = new Uint8Array(fileResult);
var byteArray = [];
for (var i = 0; i < i8a.length; i += 4) {
byteArray.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CrypTo.lib.WordArray.create(byteArray, i8a.length);
}

Below code I tested for all the big files, which fixed my solution.
var hashdata = CrypTo.algo.SHA256.create();
var file =**<FiletoHash>**;
if(file){
var reader = new FileReader();
var size = file.size;
var chunk_size = Math.pow(2, 22);
var chunks = [];
var offset = 0;
var bytes = 0;
reader.onloadend = (e) =>{
if (reader.readyState == FileReader.DONE){
//every chunk read updating hash
hashdata.update(this.arrayBufferToWordArray(reader.result));
let chunk:any = reader.result;
bytes += chunk.length;
chunks.push(chunk);
if((offset < size)){
offset += chunk_size;
var blob = file.slice(offset, offset + chunk_size);
reader.readAsArrayBuffer(blob);
} else {
//use below hash for result
//finaly generating hash
var hash = hashdata.finalize().toString();
//debugger;
};
}
};
var blob = file.slice(offset, offset + chunk_size);
reader.readAsArrayBuffer(blob);
}
}
}
arrayBufferToWordArray(fileResult) {
var i8a = new Uint8Array(fileResult);
return CrypTo.lib.WordArray.create(i8a, i8a.length);
}

You should definitely use streams or something like it to avoid loading all the file into your memory.
Specifically using CryptoJS, I have seen that it's possible to perform progressive Hashing.
var sha256 = CryptoJS.algo.SHA256.create();
sha256.update("Message Part 1");
sha256.update("Message Part 2");
sha256.update("Message Part 3");
​
var hash = sha256.finalize();
So, use FileReader to read parts of the file, then every time you read a part, you update the sha256 until there is nothing more to read.
See :
filereader api on big files

Related

How can I merge multiple WAV audio blobs into one?

My problem:
I'm trying to merge multiple blob audio files to a single blob and download it on the page.
What I tried:
I tried to concatenate the Audio blobs in the following ways:
Method - 1:
const url = window.URL.createObjectURL(new Blob(fullBlobArray), {
type: 'audio/*'
});
const a = document.createElement("a");
document.body.appendChild(a);
a.style = "display: none";
a.href = url;
a.download = "testing.wav";
a.click();
URL.revokeObjectURL(url);
a.remove();
Method - 2 (Using - ConcatenateBlobs.js plugin - ConcatenateJS)
ConcatenateBlobs(fullBlobArray, 'audio/wav', function (fullBlob) {
const url = window.URL.createObjectURL(fullBlob);
const a = document.createElement("a");
document.body.appendChild(a);
a.style = "display: none";
a.href = url;
a.download = "testing.wav";
a.click();
URL.revokeObjectURL(url);
a.remove();
//Close the window if it downloaded.
window.close();
Output is explained below:
If you have the following audio blobs:
[audio1, audio2, audio3]
Then, after downloading from the above code, only the Audio from the first file (i.e. audio1 ) is getting played. But the file size of the full blob is the total size of audio1 + audio2 + audio3
I couldn't figure out where I went wrong. Kindly help me in this to get rid of this problem.
Finally, found a solution!!!
Thanks for this StackOverflow article. Highly appreciated for the efforts for this article.
Thanks for the commenting out (#Bergi, #Zac, #Peter Krebs in the comments) that we need to Format the blob according to WAV Format
For merging multiple WAV files into a single file and below is the code:
wav_merger.js
var _index;
function readFileAsync(blob) {
return new Promise((resolve, reject) => {
let reader = new FileReader();
reader.addEventListener("loadend", function () {
resolve(reader.result);
});
reader.onerror = reject;
reader.readAsArrayBuffer(blob);
})
}
function getBufferFromBlobs(blobArray) {
return new Promise((resolve, reject) => {
var _arrBytes = [];
var _promises = [];
if (blobArray.length > 0) {
$.each(blobArray, function (index, blob) {
_index = index;
var dfd = $.Deferred();
readFileAsync(blob).then(function (byteArray) {
_arrBytes.push(byteArray);
dfd.resolve();
});
_promises.push(dfd);
});
$.when.apply($, _promises).done(function () {
var _blob = combineWavsBuffers(_arrBytes);
resolve(_blob);
});
}
});
}
function loadWav(blobArray) {
return getBufferFromBlobs(blobArray);
debugger;
// .then(function (bufferArray) {
// return combineWavsBuffers(bufferArray); //Combine original wav buffer and play
//});
}
function combineWavsBuffers(bufferArray) {
if (bufferArray.length > 0) {
var _bufferLengths = bufferArray.map(buffer => buffer.byteLength);
// Getting sum of numbers
var _totalBufferLength = _bufferLengths.reduce(function (a, b) {
return a + b;
}, 0);
var tmp = new Uint8Array(_totalBufferLength);
//Get buffer1 audio data to create the new combined wav
var audioData = getAudioData.WavHeader.readHeader(new DataView(bufferArray[0]));
var _bufferLength = 0;
$.each(bufferArray, function (index, buffer) {
//Combine array bytes of original wavs buffers.
tmp.set(new Uint8Array(buffer), _bufferLength);
_bufferLength+= buffer.byteLength;
});
//Send combined buffer and send audio data to create the audio data of combined
var arrBytesFinal = getWavBytes(tmp, {
isFloat: false, // floating point or 16-bit integer
numChannels: audioData.channels,
sampleRate: audioData.sampleRate,
});
//Create a Blob as Base64 Raw data with audio/wav type
return new Blob([arrBytesFinal], { type: 'audio/wav; codecs=MS_PCM' });
}
return null;
}
//Combine two audio .wav buffers.
function combineWavsBuffers1(buffer1, buffer2) {
//Combine array bytes of original wavs buffers
var tmp = new Uint8Array(buffer1.byteLength + buffer2.byteLength);
tmp.set(new Uint8Array(buffer1), 0);
tmp.set(new Uint8Array(buffer2), buffer1.byteLength);
//Get buffer1 audio data to create the new combined wav
var audioData = getAudioData.WavHeader.readHeader(new DataView(buffer1));
console.log('Audio Data: ', audioData);
//Send combined buffer and send audio data to create the audio data of combined
var arrBytesFinal = getWavBytes(tmp, {
isFloat: false, // floating point or 16-bit integer
numChannels: audioData.channels,
sampleRate: audioData.sampleRate,
});
//Create a Blob as Base64 Raw data with audio/wav type
return new Blob([arrBytesFinal], { type: 'audio/wav; codecs=MS_PCM' });
}
//Other functions //////////////////////////////////////////////////////////////
// Returns Uint8Array of WAV bytes
function getWavBytes(buffer, options) {
const type = options.isFloat ? Float32Array : Uint16Array
const numFrames = buffer.byteLength / type.BYTES_PER_ELEMENT
const headerBytes = getWavHeader(Object.assign({}, options, { numFrames }))
const wavBytes = new Uint8Array(headerBytes.length + buffer.byteLength);
// prepend header, then add pcmBytes
wavBytes.set(headerBytes, 0)
wavBytes.set(new Uint8Array(buffer), headerBytes.length)
return wavBytes
}
// adapted from https://gist.github.com/also/900023
// returns Uint8Array of WAV header bytes
function getWavHeader(options) {
const numFrames = options.numFrames
const numChannels = options.numChannels || 2
const sampleRate = options.sampleRate || 44100
const bytesPerSample = options.isFloat ? 4 : 2
const format = options.isFloat ? 3 : 1
const blockAlign = numChannels * bytesPerSample
const byteRate = sampleRate * blockAlign
const dataSize = numFrames * blockAlign
const buffer = new ArrayBuffer(44)
const dv = new DataView(buffer)
let p = 0
function writeString(s) {
for (let i = 0; i < s.length; i++) {
dv.setUint8(p + i, s.charCodeAt(i))
}
p += s.length
}
function writeUint32(d) {
dv.setUint32(p, d, true)
p += 4
}
function writeUint16(d) {
dv.setUint16(p, d, true)
p += 2
}
writeString('RIFF') // ChunkID
writeUint32(dataSize + 36) // ChunkSize
writeString('WAVE') // Format
writeString('fmt ') // Subchunk1ID
writeUint32(16) // Subchunk1Size
writeUint16(format) // AudioFormat
writeUint16(numChannels) // NumChannels
writeUint32(sampleRate) // SampleRate
writeUint32(byteRate) // ByteRate
writeUint16(blockAlign) // BlockAlign
writeUint16(bytesPerSample * 8) // BitsPerSample
writeString('data') // Subchunk2ID
writeUint32(dataSize) // Subchunk2Size
return new Uint8Array(buffer)
}
function getAudioData() {
function WavHeader() {
this.dataOffset = 0;
this.dataLen = 0;
this.channels = 0;
this.sampleRate = 0;
}
function fourccToInt(fourcc) {
return fourcc.charCodeAt(0) << 24 | fourcc.charCodeAt(1) << 16 | fourcc.charCodeAt(2) << 8 | fourcc.charCodeAt(3);
}
WavHeader.RIFF = fourccToInt("RIFF");
WavHeader.WAVE = fourccToInt("WAVE");
WavHeader.fmt_ = fourccToInt("fmt ");
WavHeader.data = fourccToInt("data");
WavHeader.readHeader = function (dataView) {
var w = new WavHeader();
var header = dataView.getUint32(0, false);
if (WavHeader.RIFF != header) {
return;
}
var fileLen = dataView.getUint32(4, true);
if (WavHeader.WAVE != dataView.getUint32(8, false)) {
return;
}
if (WavHeader.fmt_ != dataView.getUint32(12, false)) {
return;
}
var fmtLen = dataView.getUint32(16, true);
var pos = 16 + 4;
switch (fmtLen) {
case 16:
case 18:
w.channels = dataView.getUint16(pos + 2, true);
w.sampleRate = dataView.getUint32(pos + 4, true);
break;
default:
throw 'extended fmt chunk not implemented';
}
pos += fmtLen;
var data = WavHeader.data;
var len = 0;
while (data != header) {
header = dataView.getUint32(pos, false);
len = dataView.getUint32(pos + 4, true);
if (data == header) {
break;
}
pos += (len + 8);
}
w.dataLen = len;
w.dataOffset = pos + 8;
return w;
};
getAudioData.WavHeader = WavHeader;
}
getAudioData();
custom_script.js
getBufferFromBlobs(fullBlobArray).then(function (singleBlob) {
const url = window.URL.createObjectURL(singleBlob);
const a = document.createElement("a");
document.body.appendChild(a);
a.style = "display: none";
a.href = url;
a.download = "testing.wav";
a.click();
URL.revokeObjectURL(url);
a.remove();
});
Have the same problem, thank #Vikash to bring it here. I'm using ConcatenateBlobs.js to concat wav blobs and it seems only working on Chrome. Your solution is great but the source is a bit long, so I tried to fix ConcatenateBlobs.js base on the fact that file length in the header need to be fixed. Luckily, it works:
function ConcatenateBlobs(blobs, type, callback) {
var buffers = [];
var index = 0;
function readAsArrayBuffer() {
if (!blobs[index]) {
return concatenateBuffers();
}
var reader = new FileReader();
reader.onload = function(event) {
buffers.push(event.target.result);
index++;
readAsArrayBuffer();
};
reader.readAsArrayBuffer(blobs[index]);
}
readAsArrayBuffer();
function audioLengthTo32Bit(n) {
n = Math.floor(n);
var b1 = n & 255;
var b2 = (n >> 8) & 255;
var b3 = (n >> 16) & 255;
var b4 = (n >> 24) & 255;
return [b1, b2, b3, b4];
}
function concatenateBuffers() {
var byteLength = 0;
buffers.forEach(function(buffer) {
byteLength += buffer.byteLength;
});
var tmp = new Uint8Array(byteLength);
var lastOffset = 0;
var newData;
buffers.forEach(function(buffer) {
if (type=='audio/wav' && lastOffset > 0) newData = new Uint8Array(buffer, 44);
else newData = new Uint8Array(buffer);
tmp.set(newData, lastOffset);
lastOffset += newData.length;
});
if (type=='audio/wav') {
tmp.set(audioLengthTo32Bit(lastOffset - 8), 4);
tmp.set(audioLengthTo32Bit(lastOffset - 44), 40); // update audio length in the header
}
var blob = new Blob([tmp.buffer], {
type: type
});
callback(blob);
}
}

Azure Speech javascript SDK: Output audio in mp3

I use the sdk.connection methods to capture audio from the speech to text recognizer. It creates PCM audio that I want to convert into MP3.
This is how connection is initialised:
const con = SpeechSDK.Connection.fromRecognizer(this.recognizer);
con.messageSent = args => {
// Only record outbound audio mesages that have data in them.
if (
args.message.path === "audio" &&
args.message.isBinaryMessage &&
args.message.binaryMessage !== null
) {
this.wavFragments[this.wavFragmentCount++] =
args.message.binaryMessage;
}
};
and this is the wav file build:
let byteCount = 0;
for (let i = 0; i < this.wavFragmentCount; i++) {
byteCount += this.wavFragments[i].byteLength;
}
// Output array.
const sentAudio = new Uint8Array(byteCount);
byteCount = 0;
for (let i = 0; i < this.wavFragmentCount; i++) {
sentAudio.set(new Uint8Array(this.wavFragments[i]), byteCount);
byteCount += this.wavFragments[i].byteLength;
} // Write the audio back to disk.
// Set the file size in the wave header:
const view = new DataView(sentAudio.buffer);
view.setUint32(4, byteCount, true);
view.setUint32(40, byteCount, true);
I tried using lamejs to convert 'sentAudio' into MP3.
import {lamejs} from "../../modules/lame.min.js";
const wavBlob = new Blob([sentAudio]);
const reader = new FileReader();
reader.onload = evt => {
const audioData = evt.target.result;
const wav = lamejs.WavHeader.readHeader(new DataView(audioData));
const mp3enc = new lamejs.Mp3Encoder(1, wav.sampleRate, 128);
const samples = new Int8Array(audioData, wav.dataOffset, wav.dataLen / 2);
let mp3Tmp = mp3enc.encodeBuffer(samples); // encode mp3
// Push encode buffer to mp3Data variable
const mp3Data = [];
mp3Data.push(mp3Tmp);
// Get end part of mp3
mp3Tmp = mp3enc.flush();
// Write last data to the output data, too
// mp3Data contains now the complete mp3Data
mp3Data.push(mp3Tmp);
const blob = new Blob(mp3Data, { type: "audio/mp3" });
this.createDownloadLink(blob, "mp3");
};
reader.readAsArrayBuffer(wavBlob);
MP3 Blob is empty or contains inaudible sounds.
I have also tried using the 'encodeMP3' method described in this example but it gives the same output.
Any existing solutions to support this mp3 conversion ?
Regarding the issue, please refer to the following code.
let byteCount = 0;
for (let i= 0; i < wavFragmentCount; i++) {
byteCount += wavFragments[i].byteLength;
}
// Output array.
const sentAudio: Uint8Array = new Uint8Array(byteCount);
byteCount = 0;
for (let i: number = 0; i < wavFragmentCount; i++) {
sentAudio.set(new Uint8Array(wavFragments[i]), byteCount);
byteCount += wavFragments[i].byteLength;
}
// create wav file blob
const view = new DataView(sentAudio.buffer);
view.setUint32(4, byteCount, true);
view.setUint32(40, byteCount, true);
let wav = new Blob([view], { type: 'audio/wav' });
// read wave file as base64
var reader = new FileReader();
reader.readAsDataURL(wav);
reader.onload = () => {
var base64String = reader.result.toString();
base64String = base64String.split(',')[1];
// convert to buffer
var binary_string = window.atob(base64String);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
// convert to mp3 with lamejs
var wavHdr = lamejs.WavHeader.readHeader(
new DataView(bytes.buffer)
);
console.log(wavHdr);
var wavSamples = new Int16Array(
bytes.buffer,
0,
wavHdr.dataLen / 2
);
let mp3 = this.wavToMp3(
wavHdr.channels,
wavHdr.sampleRate,
wavSamples
);
reader.readAsDataURL(mp3);
reader.onload = () => {
var base64String = reader.result;
console.log(base64String);
};
};
function wavToMp3(channels, sampleRate, samples) {
console.log(channels);
console.log(sampleRate);
var buffer = [];
var mp3enc = new lamejs.Mp3Encoder(channels, sampleRate, 128);
var remaining = samples.length;
var maxSamples = 1152;
for (var i = 0; remaining >= maxSamples; i += maxSamples) {
var mono = samples.subarray(i, i + maxSamples);
var mp3buf = mp3enc.encodeBuffer(mono);
if (mp3buf.length > 0) {
buffer.push(new Int8Array(mp3buf));
}
remaining -= maxSamples;
}
var d = mp3enc.flush();
if (d.length > 0) {
buffer.push(new Int8Array(d));
}
console.log('done encoding, size=', buffer.length);
var blob = new Blob(buffer, { type: 'audio/mp3' });
var bUrl = window.URL.createObjectURL(blob);
console.log('Blob created, URL:', bUrl);
return blob;
}

Trying to get the Hash (SHA-512) of a very Large file, more that 2.5 G in javascript

I am trying to get the SHA512 of a large file. 2.5 G and maybe more larger file.
I so the approach it's to create an arraybuffer to be digest by the crypto.subtle.digest API.
The problem is i always have a
Array buffer allocation failed
Is it my chunk size, it's there a limit on the array buffer. I got no more idea ?
Or maybe there is a better way to get the hash digest instead use a full arraybuffer ?
// received a file object
function CalculateHash(file)
{
var obj = { File : file};
var chunkSize = 10485760;
const chunksQuantity = Math.ceil(obj.File.size / chunkSize);
const chunksQueue = new Array(chunksQuantity).fill().map((_, index) => index).reverse();
var buffer = null;
reader.onload = async function (evt) {
if (buffer == null) {
buffer = evt.currentTarget.result;
} else {
var tmp = new Uint8Array(buffer.byteLength + evt.currentTarget.result.byteLength);
tmp.set(new Uint8Array(buffer), 0);
tmp.set(new Uint8Array(evt.currentTarget.result), buffer.byteLength);
buffer = tmp;
}
readNext();
}
var readNext = async function () {
if (chunksQueue.length > 0) {
const chunkId = chunksQueue.pop();
const sentSize = chunkId * chunkSize;
const chunk = obj.File.slice(sentSize, sentSize + chunkSize);
reader.readAsArrayBuffer(chunk);
} else {
var x = await digestMessage(buffer);
hash.SHA512 = x.toUpperCase();
buffer = null;
}
}
readNext();
}
async function digestMessage(file) {
const hashBuffer = await crypto.subtle.digest('SHA-512', file); // hash the message
const hashArray = Array.from(new Uint8Array(hashBuffer)); // convert buffer to byte array
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
return hashHex;
}
Base on #ArtjomB. answer, the problem was the progressiveHash. The limitation of the BufferArray and the browser.
That is the final worker code. It mixe both approach with native digest that is most very fast than the cryptoJS library. If the file is larger than 1Gb we use the CryptoJS library, if not we use the native browser digest. Any suggestion are welcome!
var window = self;
var document = {};
self.importScripts("/Crypto.min.js");
onmessage = async function (args) {
var obj = args.data;
var reader = new FileReader();
var hash = {};
var chunkSize = 10485760;
var largeFileTrigger = 1048576000;
const chunksQuantity = Math.ceil(obj.File.size / chunkSize);
const chunksQueue = new Array(chunksQuantity).fill().map((_, index) => index).reverse();
var isLargeFile = obj.File.size > largeFileTrigger;
var buffer = null;
var progressiveArray = [];
reader.onload = async function (evt) {
if (isLargeFile) {
progressiveArray.push(evt.currentTarget.result);
} else {
if (buffer == null) {
buffer = evt.currentTarget.result;
} else {
var tmp = new Uint8Array(buffer.byteLength + evt.currentTarget.result.byteLength);
tmp.set(new Uint8Array(buffer), 0);
tmp.set(new Uint8Array(evt.currentTarget.result), buffer.byteLength);
buffer = tmp;
}
}
readNext();
}
var readNext = async function () {
if (chunksQueue.length > 0) {
const chunkId = chunksQueue.pop();
const sentSize = chunkId * chunkSize;
const chunk = obj.File.slice(sentSize, sentSize + chunkSize);
reader.readAsArrayBuffer(chunk);
} else {
var hexHash = null;
if (isLargeFile) {
var sha = CryptoJS.algo.SHA512.create();
for (var i = 0; i < progressiveArray.length; i++) {
sha.update(arrayBufferToWordArray(progressiveArray[i]));
}
hexHash = sha.finalize().toString();
} else {
hexHash = await digestMessage(buffer);
}
SHA512 = hexHash.toUpperCase();
buffer = null;
progressiveArray = null;
postMessage({ Hash: SHA512 });
}
}
readNext();
}
async function digestMessage(file) {
const hashBuffer = await crypto.subtle.digest('SHA-512', file); // hash the message
const hashArray = Array.from(new Uint8Array(hashBuffer)); // convert buffer to byte array
const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
return hashHex;
}
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}

How convert a audio file into byte array with javascript

I'm trying to convert a .wav file to a byte array string. I need to do this on the back-end targeting the file is becoming an issue.
files.forEach(file => {
let index = files.indexOf(file)
let reader = new FileReader();
reader.readAsArrayBuffer(file);
console.log(reader.result);
reader.onload = function (event) {
let byteArray = new Uint8Array(reader.result);
let FileName = file.name;
let dataAsByteArrayString = byteArray.toString();
var listHtml = $list.html();
The above code uses npm's filereader which says to target a file. I'm having difficulty doing this since this is not a front end drag and drop of the file.
my file that is generated is called "response.wav" how would I convert this file using JavaScript and node extensions? Thanks!
I dont know if this will help but the last project I worked on we parsed a .wav file using the Node buffer API and wrote it using the Node file API.
If you have more questions about the code I can direct you to the person that worked on this file the most. I hope it helps somewhat
https://github.com/IntelliSound/intelliSound-Server/blob/development/lib/sound-data-parser.js
'use strict';
function ParsedWave(buffer) {
const RIFF_HEADER_OFFSET = 0;
const FILE_SIZE_OFFSET = 4;
const RIFF_FORMAT_OFFSET = 8;
const SUBCHUNK1_ID_OFFSET = 12;
const AUDIO_FORMAT_OFFSET = 20;
const NUMBER_OF_CHANNELS_OFFSET = 22;
const SAMPLE_RATE_OFFSET = 24;
const BITS_PER_SAMPLE_OFFSET = 34;
const SUBCHUNK2_ID_OFFSET = 36;
const SUBCHUNK2_SIZE_OFFSET = 40;
const DATA_OFFSET = 44;
this.buffer = buffer;
this.riff = buffer.slice(RIFF_HEADER_OFFSET, RIFF_HEADER_OFFSET + 4).toString('utf8');
this.fileSize = buffer.readUInt32LE(FILE_SIZE_OFFSET);
this.riffType = buffer.slice(RIFF_FORMAT_OFFSET, RIFF_FORMAT_OFFSET + 4).toString('utf8');
this.subChunk1Id = buffer.slice(SUBCHUNK1_ID_OFFSET, SUBCHUNK1_ID_OFFSET + 4).toString('utf8');
this.audioFormat = buffer.readUInt16LE(AUDIO_FORMAT_OFFSET);
this.numberOfChannels = buffer.readUInt16LE(NUMBER_OF_CHANNELS_OFFSET);
this.sampleRate = buffer.readUInt32LE(SAMPLE_RATE_OFFSET);
this.bitsPerSample = buffer.readUInt16LE(BITS_PER_SAMPLE_OFFSET);
this.subChunk2Id = buffer.slice(SUBCHUNK2_ID_OFFSET, SUBCHUNK2_ID_OFFSET + 4).toString('utf8');
this.subChunk2Size = buffer.readUInt32LE(SUBCHUNK2_SIZE_OFFSET);
this.data = buffer.slice(DATA_OFFSET, this.subChunk2Size + DATA_OFFSET);
}
// Andrew - The bufferMapper function is going to accept a parsed wave-file and output
// an array of values corresponding to the data subchunk in a format which can
// be accepted as input to the neural network.
const bufferMapper = parsedWave => {
const SIXTEEN_BIT_ZERO = 32768;
const SIXTEEN_BIT_MAX = 65535;
parsedWave.neuralArray = [];
for (let i = 0; i < parsedWave.data.length; i += 2) {
const sample = parsedWave.data.readInt16LE(i);
const unsignedSample = sample + SIXTEEN_BIT_ZERO;
const sigmoidSample = unsignedSample / SIXTEEN_BIT_MAX;
parsedWave.neuralArray.push(sigmoidSample);
}
return parsedWave;
};
module.exports = data => {
const parsedWaveFile = new ParsedWave(data);
if (parsedWaveFile.riff !== 'RIFF') {
throw new TypeError('incorrect file type, must be RIFF format');
}
if (parsedWaveFile.fileSize > 10000000) {
throw new TypeError('file too large, please limit file size to less than 10MB');
}
if (parsedWaveFile.riffType !== 'WAVE') {
throw new TypeError('file must be a WAVE');
}
if (parsedWaveFile.subChunk1Id !== 'fmt ') {
throw new TypeError('the first subchunk must be fmt');
}
if (parsedWaveFile.audioFormat !== 1) {
throw new TypeError('wave file must be uncompressed linear PCM');
}
if (parsedWaveFile.numberOfChannels > 2) {
throw new TypeError('wave file must have 2 or less channels');
}
if (parsedWaveFile.sampleRate > 48000) {
throw new TypeError('wave file must have sample rate of less than 48k');
}
if (parsedWaveFile.bitsPerSample !== 16) {
throw new TypeError(`file's bit depth must be 16`);
}
if (parsedWaveFile.subChunk2Id !== 'data') {
throw new TypeError('subchunk 2 must be data');
}
const neuralMappedWaveFile = bufferMapper(parsedWaveFile);
return neuralMappedWaveFile;
};
Using the included fs module, you can read in your wav file like so:
const fs = require('fs');
const path = './path/to/my.wav';
fs.readFile(path, (err, data) => {
// Data is a Buffer object
});
For documentation on working with a Node.JS Buffer see here. Now, if you were more interested in the file conversion portion, there are a couple of libraries out there. If you just need the conversion functionality, and not implementing yourself, node-fluent-ffmpeg may work for you. If you want to implement it yourself, this node-wav file may be a good reference (too much to paste here).
If you need to go from Buffer to ArrayBuffer, this SO shows some options.

Read File byte for byte and parse to int

I have to read data from an file. This data was written by an server byte-wise into the file. The file has an fix structure, now I want to read the Information in it with JS.
I have found http://www.html5rocks.com/en/tutorials/file/dndfiles/ and copied it down to fiddle: http://jsfiddle.net/egLof4ph/
function readBlob(opt_startByte, opt_stopByte) {
var files = document.getElementById('files').files;
if (!files.length) {
alert('Please select a file!');
return;
}
var file = files[0];
var start = parseInt(opt_startByte) || 0;
var stop = parseInt(opt_stopByte) || file.size - 1;
var reader = new FileReader();
// If we use onloadend, we need to check the readyState.
reader.onloadend = function(evt) {
if (evt.target.readyState == FileReader.DONE) { // DONE == 2
document.getElementById('byte_content').textContent = evt.target.result;
document.getElementById('byte_range').textContent = ['Read bytes: ', start + 1, ' - ', stop + 1,
' of ', file.size, ' byte file'].join('');
}
};
var blob = file.slice(start, stop);
var a = reader.readAsBinaryString(blob);
}
document.querySelector('.readBytesButtons').addEventListener('click', function(evt) {
if (evt.target.tagName.toLowerCase() == 'button') {
var startByte = evt.target.getAttribute('data-startbyte');
var endByte = evt.target.getAttribute('data-endbyte');
readBlob(startByte, endByte);
}
}, false);
I knew that the first 7 Bytes are crap and can throw them away. The next 68Bytes belong together and every value is 4bytes big. After the 68Bytes again 68 usable bytes come (that 68bytes are "timeslots").
My Question:
When I am using that Code I get many signs (A, Q, &&&, special chars,..), but the data are in reality longs. How can I parse them into Numbers? According to the Filereader API readAsBinarsString() returns raw binary data. And how to correctly parse the whole File?
So, the original File looks like this:
<7B>Metadata</7B><4B>long value</4B>....17times for each timeslot <4B>long value</4B>....17times again.... and this util the end of the file.
When I am using the above Code I get output like: �&�&WK��
Furthermore I have found: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Typed_arrays (since FileReader provides an method which returns an ArrayBuffer), so I guess I should use readAsArrayBuffer(), but how to use it to get to my data?
You really need binary ?
Note that readAsBinaryString method is now deprecated as per the 12 July 2012 Working Draft from the W3C.
function readBlob(opt_startByte, opt_stopByte) {
var files = document.getElementById('files').files;
if (!files.length) {
alert('Please select a file!');
return;
}
var file = files[0];
var start = parseInt(opt_startByte) || 0;
var stop = parseInt(opt_stopByte) || file.size - 1;
var reader = new FileReader();
reader.onloadend = function (evt) {
if (evt.target.readyState == FileReader.DONE) { // DONE == 2
var a = new Uint8Array(evt.target.result)
var binary = ""
for (var i =0; i <= a.length; i++) {
binary += Number(a[i]).toString(2)
}
document.getElementById('byte_content').textContent = binary;
document.getElementById('byte_range').textContent = ['Read bytes: ', start + 1, ' - ', stop + 1,
' of ', file.size, ' byte file'].join('');
}
};;
var blob = file.slice(start, stop);
var a = reader.readAsArrayBuffer(blob)
}
document.querySelector('.readBytesButtons').addEventListener('click', function (evt) {
if (evt.target.tagName.toLowerCase() == 'button') {
var startByte = evt.target.getAttribute('data-startbyte');
var endByte = evt.target.getAttribute('data-endbyte');
readBlob(startByte, endByte);
}
}, false);

Categories

Resources