Convert base64 string to ArrayBuffer - javascript

I need to convert a base64 encode string into an ArrayBuffer.
The base64 strings are user input, they will be copy and pasted from an email, so they're not there when the page is loaded.
I would like to do this in javascript without making an ajax call to the server if possible.
I found those links interesting, but they didt'n help me:
ArrayBuffer to base64 encoded string
this is about the opposite conversion, from ArrayBuffer to base64, not the other way round
http://jsperf.com/json-vs-base64/2
this looks good but i can't figure out how to use the code.
Is there an easy (maybe native) way to do the conversion? thanks

Try this:
function _base64ToArrayBuffer(base64) {
var binary_string = window.atob(base64);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
return bytes.buffer;
}

Using TypedArray.from:
Uint8Array.from(atob(base64_string), c => c.charCodeAt(0))
Performance to be compared with the for loop version of Goran.it answer.

For Node.js users:
const myBuffer = Buffer.from(someBase64String, 'base64');
myBuffer will be of type Buffer which is a subclass of Uint8Array. Unfortunately, Uint8Array is NOT an ArrayBuffer as the OP was asking for. But when manipulating an ArrayBuffer I almost always wrap it with Uint8Array or something similar, so it should be close to what's being asked for.

Goran.it's answer does not work because of unicode problem in javascript - https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding.
I ended up using the function given on Daniel Guerrero's blog: http://blog.danguer.com/2011/10/24/base64-binary-decoding-in-javascript/
Function is listed on github link: https://github.com/danguer/blog-examples/blob/master/js/base64-binary.js
Use these lines
var uintArray = Base64Binary.decode(base64_string);
var byteArray = Base64Binary.decodeArrayBuffer(base64_string);

Async solution, it's better when the data is big:
// base64 to buffer
function base64ToBufferAsync(base64) {
var dataUrl = "data:application/octet-binary;base64," + base64;
fetch(dataUrl)
.then(res => res.arrayBuffer())
.then(buffer => {
console.log("base64 to buffer: " + new Uint8Array(buffer));
})
}
// buffer to base64
function bufferToBase64Async( buffer ) {
var blob = new Blob([buffer], {type:'application/octet-binary'});
console.log("buffer to blob:" + blob)
var fileReader = new FileReader();
fileReader.onload = function() {
var dataUrl = fileReader.result;
console.log("blob to dataUrl: " + dataUrl);
var base64 = dataUrl.substr(dataUrl.indexOf(',')+1)
console.log("dataUrl to base64: " + base64);
};
fileReader.readAsDataURL(blob);
}

Javascript is a fine development environment so it seems odd than it doesn't provide a solution to this small problem. The solutions offered elsewhere on this page are potentially slow. Here is my solution. It employs the inbuilt functionality that decodes base64 image and sound data urls.
var req = new XMLHttpRequest;
req.open('GET', "data:application/octet;base64," + base64Data);
req.responseType = 'arraybuffer';
req.onload = function fileLoaded(e)
{
var byteArray = new Uint8Array(e.target.response);
// var shortArray = new Int16Array(e.target.response);
// var unsignedShortArray = new Int16Array(e.target.response);
// etc.
}
req.send();
The send request fails if the base 64 string is badly formed.
The mime type (application/octet) is probably unnecessary.
Tested in chrome. Should work in other browsers.

Pure JS - no string middlestep (no atob)
I write following function which convert base64 in direct way (without conversion to string at the middlestep). IDEA
get 4 base64 characters chunk
find index of each character in base64 alphabet
convert index to 6-bit number (binary string)
join four 6 bit numbers which gives 24-bit numer (stored as binary string)
split 24-bit string to three 8-bit and covert each to number and store them in output array
corner case: if input base64 string ends with one/two = char, remove one/two numbers from output array
Below solution allows to process large input base64 strings. Similar function for convert bytes to base64 without btoa is HERE
function base64ToBytesArr(str) {
const abc = [..."ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"]; // base64 alphabet
let result = [];
for(let i=0; i<str.length/4; i++) {
let chunk = [...str.slice(4*i,4*i+4)]
let bin = chunk.map(x=> abc.indexOf(x).toString(2).padStart(6,0)).join('');
let bytes = bin.match(/.{1,8}/g).map(x=> +('0b'+x));
result.push(...bytes.slice(0,3 - (str[4*i+2]=="=") - (str[4*i+3]=="=")));
}
return result;
}
// --------
// TEST
// --------
let test = "Alice's Adventure in Wonderland.";
console.log('test string:', test.length, test);
let b64_btoa = btoa(test);
console.log('encoded string:', b64_btoa);
let decodedBytes = base64ToBytesArr(b64_btoa); // decode base64 to array of bytes
console.log('decoded bytes:', JSON.stringify(decodedBytes));
let decodedTest = decodedBytes.map(b => String.fromCharCode(b) ).join``;
console.log('Uint8Array', JSON.stringify(new Uint8Array(decodedBytes)));
console.log('decoded string:', decodedTest.length, decodedTest);
Caution!
If you want to decode base64 to STRING (not bytes array) and you know that result contains utf8 characters then atob will fail in general e.g. for character 💩 the atob("8J+SqQ==") will give wrong result . In this case you can use above solution and convert result bytes array to string in proper way e.g. :
function base64ToBytesArr(str) {
const abc = [..."ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"]; // base64 alphabet
let result = [];
for(let i=0; i<str.length/4; i++) {
let chunk = [...str.slice(4*i,4*i+4)]
let bin = chunk.map(x=> abc.indexOf(x).toString(2).padStart(6,0)).join('');
let bytes = bin.match(/.{1,8}/g).map(x=> +('0b'+x));
result.push(...bytes.slice(0,3 - (str[4*i+2]=="=") - (str[4*i+3]=="=")));
}
return result;
}
// --------
// TEST
// --------
let testB64 = "8J+SqQ=="; // for string: "💩";
console.log('input base64 :', testB64);
let decodedBytes = base64ToBytesArr(testB64); // decode base64 to array of bytes
console.log('decoded bytes :', JSON.stringify(decodedBytes));
let result = new TextDecoder("utf-8").decode(new Uint8Array(decodedBytes));
console.log('properly decoded string :', result);
let result_atob = atob(testB64);
console.log('decoded by atob :', result_atob);
Snippets tested 2022-08-04 on: chrome 103.0.5060.134 (arm64), safari 15.2, firefox 103.0.1 (64 bit), edge 103.0.1264.77 (arm64), and node-js v12.16.1

I would strongly suggest using an npm package implementing correctly the base64 specification.
The best one I know is rfc4648
The problem is that btoa and atob use binary strings instead of Uint8Array and trying to convert to and from it is cumbersome. Also there is a lot of bad packages in npm for that. I lose a lot of time before finding that one.
The creators of that specific package did a simple thing: they took the specification of Base64 (which is here by the way) and implemented it correctly from the beginning to the end. (Including other formats in the specification that are also useful like Base64-url, Base32, etc ...) That doesn't seem a lot but apparently that was too much to ask to the bunch of other libraries.
So yeah, I know I'm doing a bit of proselytism but if you want to avoid losing your time too just use rfc4648.

I used the accepted answer to this question to create base64Url string <-> arrayBuffer conversions in the realm of base64Url data transmitted via ASCII-cookie [atob, btoa are base64[with +/]<->js binary string], so I decided to post the code.
Many of us may want both conversions and client-server communication may use the base64Url version (though a cookie may contain +/ as well as -_ characters if I understand well, only ",;\ characters and some wicked characters from the 128 ASCII are disallowed). But a url cannot contain / character, hence the wider use of b64 url version which of course not what atob-btoa supports...
Seeing other comments, I would like to stress that my use case here is base64Url data transmission via url/cookie and trying to use this crypto data with the js crypto api (2017) hence the need for ArrayBuffer representation and b64u <-> arrBuff conversions... if array buffers represent other than base64 (part of ascii) this conversion wont work since atob, btoa is limited to ascii(128). Check out an appropriate converter like below:
The buff -> b64u version is from a tweet from Mathias Bynens, thanks for that one (too)! He also wrote a base64 encoder/decoder:
https://github.com/mathiasbynens/base64
Coming from java, it may help when trying to understand the code that java byte[] is practically js Int8Array (signed int) but we use here the unsigned version Uint8Array since js conversions work with them. They are both 256bit, so we call it byte[] in js now...
The code is from a module class, that is why static.
//utility
/**
* Array buffer to base64Url string
* - arrBuff->byte[]->biStr->b64->b64u
* #param arrayBuffer
* #returns {string}
* #private
*/
static _arrayBufferToBase64Url(arrayBuffer) {
console.log('base64Url from array buffer:', arrayBuffer);
let base64Url = window.btoa(String.fromCodePoint(...new Uint8Array(arrayBuffer)));
base64Url = base64Url.replaceAll('+', '-');
base64Url = base64Url.replaceAll('/', '_');
console.log('base64Url:', base64Url);
return base64Url;
}
/**
* Base64Url string to array buffer
* - b64u->b64->biStr->byte[]->arrBuff
* #param base64Url
* #returns {ArrayBufferLike}
* #private
*/
static _base64UrlToArrayBuffer(base64Url) {
console.log('array buffer from base64Url:', base64Url);
let base64 = base64Url.replaceAll('-', '+');
base64 = base64.replaceAll('_', '/');
const binaryString = window.atob(base64);
const length = binaryString.length;
const bytes = new Uint8Array(length);
for (let i = 0; i < length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
console.log('array buffer:', bytes.buffer);
return bytes.buffer;
}

made a ArrayBuffer from a base64:
function base64ToArrayBuffer(base64) {
var binary_string = window.atob(base64);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
return bytes.buffer;
}
I was trying to use above code and It's working fine.

The result of atob is a string that is separated with some comma
,
A simpler way is to convert this string to a json array string and after that parse it to a byteArray
below code can simply be used to convert base64 to an array of number
let byteArray = JSON.parse('['+atob(base64)+']');
let buffer = new Uint8Array(byteArray);

Solution without atob
I've seen many people complaining about using atob and btoa in the replies. There are some issues to take into account when using them.
There's a solution without using them in the MDN page about Base64. Below you can find the code to convert a base64 string into a Uint8Array copied from the docs.
Note that the function below returns a Uint8Array. To get the ArrayBuffer version you just need to do uintArray.buffer.
function b64ToUint6(nChr) {
return nChr > 64 && nChr < 91
? nChr - 65
: nChr > 96 && nChr < 123
? nChr - 71
: nChr > 47 && nChr < 58
? nChr + 4
: nChr === 43
? 62
: nChr === 47
? 63
: 0;
}
function base64DecToArr(sBase64, nBlocksSize) {
const sB64Enc = sBase64.replace(/[^A-Za-z0-9+/]/g, "");
const nInLen = sB64Enc.length;
const nOutLen = nBlocksSize
? Math.ceil(((nInLen * 3 + 1) >> 2) / nBlocksSize) * nBlocksSize
: (nInLen * 3 + 1) >> 2;
const taBytes = new Uint8Array(nOutLen);
let nMod3;
let nMod4;
let nUint24 = 0;
let nOutIdx = 0;
for (let nInIdx = 0; nInIdx < nInLen; nInIdx++) {
nMod4 = nInIdx & 3;
nUint24 |= b64ToUint6(sB64Enc.charCodeAt(nInIdx)) << (6 * (3 - nMod4));
if (nMod4 === 3 || nInLen - nInIdx === 1) {
nMod3 = 0;
while (nMod3 < 3 && nOutIdx < nOutLen) {
taBytes[nOutIdx] = (nUint24 >>> ((16 >>> nMod3) & 24)) & 255;
nMod3++;
nOutIdx++;
}
nUint24 = 0;
}
}
return taBytes;
}
If you're interested in the reverse operation, ArrayBuffer to base64, you can find how to do it in the same link.

Related

Having Trouble Parsing a Base64 Encoded Array into Geometry Data [duplicate]

I need to convert a base64 encode string into an ArrayBuffer.
The base64 strings are user input, they will be copy and pasted from an email, so they're not there when the page is loaded.
I would like to do this in javascript without making an ajax call to the server if possible.
I found those links interesting, but they didt'n help me:
ArrayBuffer to base64 encoded string
this is about the opposite conversion, from ArrayBuffer to base64, not the other way round
http://jsperf.com/json-vs-base64/2
this looks good but i can't figure out how to use the code.
Is there an easy (maybe native) way to do the conversion? thanks
Try this:
function _base64ToArrayBuffer(base64) {
var binary_string = window.atob(base64);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
return bytes.buffer;
}
Using TypedArray.from:
Uint8Array.from(atob(base64_string), c => c.charCodeAt(0))
Performance to be compared with the for loop version of Goran.it answer.
For Node.js users:
const myBuffer = Buffer.from(someBase64String, 'base64');
myBuffer will be of type Buffer which is a subclass of Uint8Array. Unfortunately, Uint8Array is NOT an ArrayBuffer as the OP was asking for. But when manipulating an ArrayBuffer I almost always wrap it with Uint8Array or something similar, so it should be close to what's being asked for.
Goran.it's answer does not work because of unicode problem in javascript - https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding.
I ended up using the function given on Daniel Guerrero's blog: http://blog.danguer.com/2011/10/24/base64-binary-decoding-in-javascript/
Function is listed on github link: https://github.com/danguer/blog-examples/blob/master/js/base64-binary.js
Use these lines
var uintArray = Base64Binary.decode(base64_string);
var byteArray = Base64Binary.decodeArrayBuffer(base64_string);
Async solution, it's better when the data is big:
// base64 to buffer
function base64ToBufferAsync(base64) {
var dataUrl = "data:application/octet-binary;base64," + base64;
fetch(dataUrl)
.then(res => res.arrayBuffer())
.then(buffer => {
console.log("base64 to buffer: " + new Uint8Array(buffer));
})
}
// buffer to base64
function bufferToBase64Async( buffer ) {
var blob = new Blob([buffer], {type:'application/octet-binary'});
console.log("buffer to blob:" + blob)
var fileReader = new FileReader();
fileReader.onload = function() {
var dataUrl = fileReader.result;
console.log("blob to dataUrl: " + dataUrl);
var base64 = dataUrl.substr(dataUrl.indexOf(',')+1)
console.log("dataUrl to base64: " + base64);
};
fileReader.readAsDataURL(blob);
}
Javascript is a fine development environment so it seems odd than it doesn't provide a solution to this small problem. The solutions offered elsewhere on this page are potentially slow. Here is my solution. It employs the inbuilt functionality that decodes base64 image and sound data urls.
var req = new XMLHttpRequest;
req.open('GET', "data:application/octet;base64," + base64Data);
req.responseType = 'arraybuffer';
req.onload = function fileLoaded(e)
{
var byteArray = new Uint8Array(e.target.response);
// var shortArray = new Int16Array(e.target.response);
// var unsignedShortArray = new Int16Array(e.target.response);
// etc.
}
req.send();
The send request fails if the base 64 string is badly formed.
The mime type (application/octet) is probably unnecessary.
Tested in chrome. Should work in other browsers.
Pure JS - no string middlestep (no atob)
I write following function which convert base64 in direct way (without conversion to string at the middlestep). IDEA
get 4 base64 characters chunk
find index of each character in base64 alphabet
convert index to 6-bit number (binary string)
join four 6 bit numbers which gives 24-bit numer (stored as binary string)
split 24-bit string to three 8-bit and covert each to number and store them in output array
corner case: if input base64 string ends with one/two = char, remove one/two numbers from output array
Below solution allows to process large input base64 strings. Similar function for convert bytes to base64 without btoa is HERE
function base64ToBytesArr(str) {
const abc = [..."ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"]; // base64 alphabet
let result = [];
for(let i=0; i<str.length/4; i++) {
let chunk = [...str.slice(4*i,4*i+4)]
let bin = chunk.map(x=> abc.indexOf(x).toString(2).padStart(6,0)).join('');
let bytes = bin.match(/.{1,8}/g).map(x=> +('0b'+x));
result.push(...bytes.slice(0,3 - (str[4*i+2]=="=") - (str[4*i+3]=="=")));
}
return result;
}
// --------
// TEST
// --------
let test = "Alice's Adventure in Wonderland.";
console.log('test string:', test.length, test);
let b64_btoa = btoa(test);
console.log('encoded string:', b64_btoa);
let decodedBytes = base64ToBytesArr(b64_btoa); // decode base64 to array of bytes
console.log('decoded bytes:', JSON.stringify(decodedBytes));
let decodedTest = decodedBytes.map(b => String.fromCharCode(b) ).join``;
console.log('Uint8Array', JSON.stringify(new Uint8Array(decodedBytes)));
console.log('decoded string:', decodedTest.length, decodedTest);
Caution!
If you want to decode base64 to STRING (not bytes array) and you know that result contains utf8 characters then atob will fail in general e.g. for character 💩 the atob("8J+SqQ==") will give wrong result . In this case you can use above solution and convert result bytes array to string in proper way e.g. :
function base64ToBytesArr(str) {
const abc = [..."ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"]; // base64 alphabet
let result = [];
for(let i=0; i<str.length/4; i++) {
let chunk = [...str.slice(4*i,4*i+4)]
let bin = chunk.map(x=> abc.indexOf(x).toString(2).padStart(6,0)).join('');
let bytes = bin.match(/.{1,8}/g).map(x=> +('0b'+x));
result.push(...bytes.slice(0,3 - (str[4*i+2]=="=") - (str[4*i+3]=="=")));
}
return result;
}
// --------
// TEST
// --------
let testB64 = "8J+SqQ=="; // for string: "💩";
console.log('input base64 :', testB64);
let decodedBytes = base64ToBytesArr(testB64); // decode base64 to array of bytes
console.log('decoded bytes :', JSON.stringify(decodedBytes));
let result = new TextDecoder("utf-8").decode(new Uint8Array(decodedBytes));
console.log('properly decoded string :', result);
let result_atob = atob(testB64);
console.log('decoded by atob :', result_atob);
Snippets tested 2022-08-04 on: chrome 103.0.5060.134 (arm64), safari 15.2, firefox 103.0.1 (64 bit), edge 103.0.1264.77 (arm64), and node-js v12.16.1
I would strongly suggest using an npm package implementing correctly the base64 specification.
The best one I know is rfc4648
The problem is that btoa and atob use binary strings instead of Uint8Array and trying to convert to and from it is cumbersome. Also there is a lot of bad packages in npm for that. I lose a lot of time before finding that one.
The creators of that specific package did a simple thing: they took the specification of Base64 (which is here by the way) and implemented it correctly from the beginning to the end. (Including other formats in the specification that are also useful like Base64-url, Base32, etc ...) That doesn't seem a lot but apparently that was too much to ask to the bunch of other libraries.
So yeah, I know I'm doing a bit of proselytism but if you want to avoid losing your time too just use rfc4648.
I used the accepted answer to this question to create base64Url string <-> arrayBuffer conversions in the realm of base64Url data transmitted via ASCII-cookie [atob, btoa are base64[with +/]<->js binary string], so I decided to post the code.
Many of us may want both conversions and client-server communication may use the base64Url version (though a cookie may contain +/ as well as -_ characters if I understand well, only ",;\ characters and some wicked characters from the 128 ASCII are disallowed). But a url cannot contain / character, hence the wider use of b64 url version which of course not what atob-btoa supports...
Seeing other comments, I would like to stress that my use case here is base64Url data transmission via url/cookie and trying to use this crypto data with the js crypto api (2017) hence the need for ArrayBuffer representation and b64u <-> arrBuff conversions... if array buffers represent other than base64 (part of ascii) this conversion wont work since atob, btoa is limited to ascii(128). Check out an appropriate converter like below:
The buff -> b64u version is from a tweet from Mathias Bynens, thanks for that one (too)! He also wrote a base64 encoder/decoder:
https://github.com/mathiasbynens/base64
Coming from java, it may help when trying to understand the code that java byte[] is practically js Int8Array (signed int) but we use here the unsigned version Uint8Array since js conversions work with them. They are both 256bit, so we call it byte[] in js now...
The code is from a module class, that is why static.
//utility
/**
* Array buffer to base64Url string
* - arrBuff->byte[]->biStr->b64->b64u
* #param arrayBuffer
* #returns {string}
* #private
*/
static _arrayBufferToBase64Url(arrayBuffer) {
console.log('base64Url from array buffer:', arrayBuffer);
let base64Url = window.btoa(String.fromCodePoint(...new Uint8Array(arrayBuffer)));
base64Url = base64Url.replaceAll('+', '-');
base64Url = base64Url.replaceAll('/', '_');
console.log('base64Url:', base64Url);
return base64Url;
}
/**
* Base64Url string to array buffer
* - b64u->b64->biStr->byte[]->arrBuff
* #param base64Url
* #returns {ArrayBufferLike}
* #private
*/
static _base64UrlToArrayBuffer(base64Url) {
console.log('array buffer from base64Url:', base64Url);
let base64 = base64Url.replaceAll('-', '+');
base64 = base64.replaceAll('_', '/');
const binaryString = window.atob(base64);
const length = binaryString.length;
const bytes = new Uint8Array(length);
for (let i = 0; i < length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
console.log('array buffer:', bytes.buffer);
return bytes.buffer;
}
made a ArrayBuffer from a base64:
function base64ToArrayBuffer(base64) {
var binary_string = window.atob(base64);
var len = binary_string.length;
var bytes = new Uint8Array(len);
for (var i = 0; i < len; i++) {
bytes[i] = binary_string.charCodeAt(i);
}
return bytes.buffer;
}
I was trying to use above code and It's working fine.
The result of atob is a string that is separated with some comma
,
A simpler way is to convert this string to a json array string and after that parse it to a byteArray
below code can simply be used to convert base64 to an array of number
let byteArray = JSON.parse('['+atob(base64)+']');
let buffer = new Uint8Array(byteArray);
Solution without atob
I've seen many people complaining about using atob and btoa in the replies. There are some issues to take into account when using them.
There's a solution without using them in the MDN page about Base64. Below you can find the code to convert a base64 string into a Uint8Array copied from the docs.
Note that the function below returns a Uint8Array. To get the ArrayBuffer version you just need to do uintArray.buffer.
function b64ToUint6(nChr) {
return nChr > 64 && nChr < 91
? nChr - 65
: nChr > 96 && nChr < 123
? nChr - 71
: nChr > 47 && nChr < 58
? nChr + 4
: nChr === 43
? 62
: nChr === 47
? 63
: 0;
}
function base64DecToArr(sBase64, nBlocksSize) {
const sB64Enc = sBase64.replace(/[^A-Za-z0-9+/]/g, "");
const nInLen = sB64Enc.length;
const nOutLen = nBlocksSize
? Math.ceil(((nInLen * 3 + 1) >> 2) / nBlocksSize) * nBlocksSize
: (nInLen * 3 + 1) >> 2;
const taBytes = new Uint8Array(nOutLen);
let nMod3;
let nMod4;
let nUint24 = 0;
let nOutIdx = 0;
for (let nInIdx = 0; nInIdx < nInLen; nInIdx++) {
nMod4 = nInIdx & 3;
nUint24 |= b64ToUint6(sB64Enc.charCodeAt(nInIdx)) << (6 * (3 - nMod4));
if (nMod4 === 3 || nInLen - nInIdx === 1) {
nMod3 = 0;
while (nMod3 < 3 && nOutIdx < nOutLen) {
taBytes[nOutIdx] = (nUint24 >>> ((16 >>> nMod3) & 24)) & 255;
nMod3++;
nOutIdx++;
}
nUint24 = 0;
}
}
return taBytes;
}
If you're interested in the reverse operation, ArrayBuffer to base64, you can find how to do it in the same link.

How do I get the actual size in bytes for a number and a string in JavaScript in a browser environment?

I am trying to get the actual size (in bytes) of a number and a string in browsers e.g. chrome.
I learned that in JavaScript numbers are represented in double precision takes up 64 bits and strings are UTF-16 code unit so it takes either 2 bytes or 4 bytes.
I first tried to use new Blob but it encodes string component characters as UTF-8 not UTF-16. And I know there is a Buffer.from API in Node but it is not available in a browser environment.
My question is how I can get the actual size of a number and a string in bytes from a browser, e.g. chrome?
You can do that natively with the help of TextEncoder
let str1 = 'Beta'; // 'Beta' text in English
let str2 = '贝塔'; // 'Beta' text in Chinese
const encoder = new TextEncoder();
const len1 = encoder.encode(str1).length;
const len2 = encoder.encode(str2).length;
console.log(len1); // 4
console.log(len2); // 6
First of all it is important to realize that the spec doesn't mandate any representation. Just behavior.
Strings are stored in UTF-16 but fortunately for your purpose each index represents 16 bits.
For example
console.log('😠'.length); // Should log 2 because emoji takes 2 16 bit parts
For numbers it depends. V8 represents small integer numbers as actual 32 bit ints.
With https://github.com/substack/node-browserify you can work with buffers in the Browser by using: https://github.com/toots/buffer-browserify.
//in your browsify js file:
require('buffer')
Buffer.byteLength(String.fromCharCode(55555), 'utf16')
Buffer.byteLength(String.fromCharCode(55555, 57000), 'utf16')
iconv-lite: Pure JS character encoding conversion
var iconv = require('iconv-lite');
var buf =iconv.encode("Hello World", 'utf16');
console.log(buf);
console.log(buf.length); // returns 24
Here is my answer to your problem :
function getBytesFromVar(theVar) {
if(theVar !== null && theVar !== undefined) {
switch (typeof theVar) {
case 'string' : {
var encoder = new TextEncoder();
encoder['encoding'] = "utf-16";
return encoder['encode'](theVar).length * 2;
}
case 'number' : {
return 8;
}
case 'boolean' : {
return 4;
}
case 'object' : {
if ( theVar instanceof String) {
var encoder = new TextEncoder();
encoder['encoding'] = "utf-16";
return encoder['encode'](theVar.toString()).length * 2;
} else {
return 0;
}
}
}
}
else {
return 0;
}
}
The getBytesFromVar function take a var and return the number of byte used.
Function use TextEncoder to get the string length and then calculate the bytes.
In case of a string created with:
let str = new String('Alain♥');
function will work with String objects.
ATTENTION: this can't be used to calculate memory footprint in browser as other mechanism of memory management can increase/decrease this values.
Vars can be allocated in different memory segment. For example, String object are created on the heap and string vars are created on string constant pool.
Also vars are manipulated through pointer.
For example, 2 strings vars that contain the same string are created on the string constant pool. First will be allocated, but the second one will be a pointer to the first one. So the size in memory byte will not be simply twice the size of the string.
Good post about that: https://levelup.gitconnected.com/bytefish-vs-new-string-bytefish-what-is-the-difference-a795f6a7a08b
Use case:
var myString='Alain♥';
var myNumber = 120;
var objString = new String('Alain♥');
var myFloat = 105.456;
console.log('%o is %o bytes', myString, getBytesFromVar(myString));
console.log('%o is %o bytes', myNumber, getBytesFromVar(myNumber));
console.log('%o is %o bytes', objString, getBytesFromVar(objString));
console.log('%o is %o bytes', myFloat, getBytesFromVar(myFloat));
I have used the npm module object-sizeof for this. You can use it to get the size of integer or string variables in bytes. This is a sample usage,
var sizeof = require('object-sizeof');
console.log(sizeof(123)); //prints 8
You can do that natively with the help of TextEncoder
let str1 = 'Beta'; // 'Beta' text in English
let str2 = '贝塔'; // 'Beta' text in Chinese
const encoder = new TextEncoder();
const len1 = encoder.encode(str1).length;
const len2 = encoder.encode(str2).length;
console.log(len1); // 4
console.log(len2); // 6

How to use crypto-js to save the binary data, of an encryption-output, to a file?

I'm trying to assist a mobile developer with encrypting an image and uploading it to Azure blob storage. The mobile app uses expo and crypto js. I'm not super familiar with mobile dev or expo but it looks like expo gives you access to a base64 encoded version of the image.
My goal is to encrypt that image data, using crypto js, and upload it to Azure blob storage.
The specifics of expo or Azure aren't really that important to my question, but I figure they're worth mentioning. What is important, I think, is that I'm using crypto js to AES encrypt that image data.
I'm starting with a base64 string of image data and so I use crypto js to parse that like follows ...
const words = CryptoES.enc.Base64.parse(data);
This gives me a WordArray representing the image data, I think (from the base64 string which the mobile API gives me).
Next I can encrypt that image data like so ...
const encrypted = CryptoES.AES.encrypt(words, AES_KEY, { iv: AES_IV });
Now that I have the encrypted data, I would like to write it out to a file in just a binary hex format or whatever. I don't want base64 text in the file and I don't want a hex-string in the file - I'd like it to contain the literal encrypted data byte for byte.
I'm not sure how to get this data.
I guess it's just "toString" but when I do that it says invalid utf8. This is a JPG file that is being dealt with.
How can I get just the actual byte data and write that to a file with crypto js?
CryptoJS.AES.encrypt() returns a CipherParams object that encapsulates, among others, the ciphertext as WordArray. One possibility is to convert this WordArray to a Uint8Array with a custom method. In the following code this conversion is done by convertWordArrayToUint8Array():
function convertWordArrayToUint8Array(wordArray) {
var arrayOfWords = wordArray.hasOwnProperty("words") ? wordArray.words : [];
var length = wordArray.hasOwnProperty("sigBytes") ? wordArray.sigBytes : arrayOfWords.length * 4;
var uInt8Array = new Uint8Array(length), index=0, word, i;
for (i=0; i<length; i++) {
word = arrayOfWords[i];
uInt8Array[index++] = word >> 24;
uInt8Array[index++] = (word >> 16) & 0xff;
uInt8Array[index++] = (word >> 8) & 0xff;
uInt8Array[index++] = word & 0xff;
}
return uInt8Array;
}
var AES_KEY = CryptoJS.enc.Utf8.parse('0123456789012345');
var AES_IV = CryptoJS.enc.Utf8.parse('5432109876543210');
var plaintext = 'The quick brown fox jumps over the lazy dog';
var ciphertextCP = CryptoJS.AES.encrypt(plaintext, AES_KEY, { iv: AES_IV }); // CipherParams object
var ciphertextWA = ciphertextCP.ciphertext; // WordArray
var ciphertextArr = convertWordArrayToUint8Array(ciphertextWA); // Uint8Array
This Uint8Array can now be stored in a file, e.g. with:
var fileName = "encdata.bin";
saveByteArray([ciphertextArr], fileName);
using saveByteArray() from here.
Another approach is to convert the WordArray to a binary string using the Latin1 encoder:
var ciphertextBinStr = ciphertextWA.toString(CryptoJS.enc.Latin1);
which can then easily be converted to a Uint8Array, e.g. with:
function str2Uint8Array(str) {
const arr = new Uint8Array(new ArrayBuffer(str.length));
for (let i = 0, strLen = str.length; i < strLen; i++)
arr[i] = str.charCodeAt(i);
return arr;
}
var ciphertextArr = str2Uint8Array(ciphertextBinStr);

C# SHA256 ComputeHash result different with CryptoJS SHA256 function

I have a C# function as below:
string stringvalue = "530500480530490480530480480520570480520510500490";
var encodedvalue= Encoding.Unicode.GetBytes(stringvalue);
using (HashAlgorithm ssp = System.Security.Cryptography.HashAlgorithm.Create("SHA256"))
{
var digest = ssp.ComputeHash(encodedvalue);
return BitConverter.ToString(digest);
}
I need to create a javascript function that match the code above so that the end result for both C# and JS is the same.
Currently in my JS code, I'm using this:
var hash = CryptoJS.SHA256("530500480530490480530480480520570480520510500490");
var hexhash = hash.toString(CryptoJS.enc.hex);
This is the result of my hexhash:
d956678c8f12c65299daf35019a9a1eb3e6eb9855fd850aeb5aafe46057d179e
But in my C# code, this line of var digest = ssp.ComputeHash(bPass); return the following array:
I don't know much about encoding. Please tell me what type of result is being populated in the c# code above? If I'm not mistaken, the ComputeHash is returning bytes but I need lots of reading to confirm that which is another long hour of studying
I tried many different ways of converting the JS Sha256 code but no luck. I'm stuck at this particular line for almost a day.
Please help. Thanks
EDIT:
Sorry for the code error. I had updated the C# code. ComputeHash accept an array
In my example I am using System.Security.Cryptography.SHA256Managed to get SHA256 in C#.
The method SHA256Managed.ComputeHash takes a byte array as a parameter and return another byte array. Now we need to convert back your byte array to a string.
The following code return the same result a Javascript SHA-256.
byte[] bytes = Encoding.UTF8.GetBytes("530500480530490480530480480520570480520510500490");
SHA256Managed hashstring = new SHA256Managed();
byte[] hash = hashstring.ComputeHash(bytes);
string hashString = string.Empty;
foreach (byte x in hash)
{
hashString += String.Format("{0:x2}", x);
}
return(hashString);
Just to explain : String.Format("{0:x2}", x)
X means Hexadecimal format.
2 means 2 characters.
I finally found the answer after uncountable hours of trial and error.
The C# code var digest = ssp.ComputeHash(encodedvalue) is returning byte array from the result of var encodedvalue= Encoding.Unicode.GetBytes(stringvalue); as Jean replied. In order to create the function in Javascript, I need to ensure that the encodedvalue is producing the correct encoding format and size just like the one in C#.
Using only CryptoJS, I manage to get the matching result from below
function GetHexFromString() {
var stringVal = '8563A578-7402-4567-A6CE-4DE4E0825B021234';
// Convert the string to UTF 16 little-endian
// Result: 560530540510650530550560450550520480500450520530540550450650540670690450520680690520690480560500530660480500490500510520
var utf16le = CryptoJS.enc.Utf16LE.parse(stringVal);
// Convert to Sha256 format and get the word array
var utf16Sha256 = CryptoJS.SHA256(utf16le);
// Convert the Sha256 word array to Uint8Array to get the 32 byte array just to see the result to ensure it match with the C# function
// Result: 94,203,69,29,35,202,209,149,121,144,44,6,98,250,141,161,102,7,238,35,228,117,111,236,118,115,51,113,134,72,52,69
var utf16sha256Array = convertWordArrayToUint8Array(utf16Sha256);
// Convert the Sha256 to hex (if i'm not mistaken, it's base 16) format
var hexSha256 = utf16Sha256.toString(CryptoJS.enc.hex);
// Insert a dash in between 2 characters in the string
hexSha256 = hexSha256.replace(/(\S{2})/g, "$1-");
// Remove the last dash in the string
hexSha256 = hexSha256.replace(/-$/, "");
// Final Result: 5E-CB-45-1D-23-CA-D1-95-79-90-2C-06-62-FA-8D-A1-66-07-EE-23-E4-75-6F-EC-76-73-33-71-86-48-34-45
return hexSha256.toUpperCase();
}
function convertWordArrayToUint8Array(wordArray) {
var len = wordArray.words.length,
u8_array = new Uint8Array(len << 2),
offset = 0, word, i
;
for (i = 0; i < len; i++) {
var word = wordArray.words[i];
u8_array[offset++] = word >> 24;
u8_array[offset++] = (word >> 16) & 0xff;
u8_array[offset++] = (word >> 8) & 0xff;
u8_array[offset++] = word & 0xff;
}
return u8_array;
}
Hope it help whoever that need such method
An alternative to Koo SengSeng's answer (if you don't want to use CryptoJS library).
SHA256 function is from here, the arrToUintArr function is from Koo SengSeng's answer.
var SHA256=function a(b){function c(a,b){return a>>>b|a<<32-b}for(var d,e,f=Math.pow,g=f(2,32),h="length",i="",j=[],k=8*b[h],l=a.h=a.h||[],m=a.k=a.k||[],n=m[h],o={},p=2;64>n;p++)if(!o[p]){for(d=0;313>d;d+=p)o[d]=p;l[n]=f(p,.5)*g|0,m[n++]=f(p,1/3)*g|0}for(b+="\x80";b[h]%64-56;)b+="\x00";for(d=0;d<b[h];d++){if(e=b.charCodeAt(d),e>>8)return;j[d>>2]|=e<<(3-d)%4*8}for(j[j[h]]=k/g|0,j[j[h]]=k,e=0;e<j[h];){var q=j.slice(e,e+=16),r=l;for(l=l.slice(0,8),d=0;64>d;d++){var s=q[d-15],t=q[d-2],u=l[0],v=l[4],w=l[7]+(c(v,6)^c(v,11)^c(v,25))+(v&l[5]^~v&l[6])+m[d]+(q[d]=16>d?q[d]:q[d-16]+(c(s,7)^c(s,18)^s>>>3)+q[d-7]+(c(t,17)^c(t,19)^t>>>10)|0),x=(c(u,2)^c(u,13)^c(u,22))+(u&l[1]^u&l[2]^l[1]&l[2]);l=[w+x|0].concat(l),l[4]=l[4]+w|0}for(d=0;8>d;d++)l[d]=l[d]+r[d]|0}for(d=0;8>d;d++)for(e=3;e+1;e--){var y=l[d]>>8*e&255;i+=(16>y?0:"")+y.toString(16)}return i};
var arrToUintArr=function(a){for(var l=a.length,b=new Uint8Array(l<<2),o=0,w,i=0;i<l;i++) w=a[i],b[o++]=w>>24,b[o++]=(w>>16)&0xff,b[o++]=(w>>8)&0xff,b[o++]=w&0xff;return b;}
var computeHash=function(k){for(var a=[],s=SHA256(k),i=0;i<8;i++) a.push(parseInt(s.substr(i*8,8),16));return arrToUintArr(a);}
computeHash(k) will return an array of numbers representing bytes.
This is equal to below code in C#:
new System.Security.Cryptography.SHA256CryptoServiceProvider().ComputeHash(Encoding.UTF8.GetBytes(k));
Try
var digest = ssp.ComputeHash(Encoding.UTF8.GetBytes(stringvalue))
return BitConverter.ToString(digest)
.Replace("-", string.Empty)
.ToLowerInvariant();
That js library is converting the string to UTF8 before calculating its hash.
typescript code:
private computeHash(text: string): string {
return CryptoJS.SHA256(text).toString();
}
c# equivalent:
private string ComputeHash(string text)
{
using (var sha256 = SHA256.Create())
{
var bytes = Encoding.UTF8.GetBytes(text);
var hash = sha256.ComputeHash(bytes);
return hash.Aggregate(string.Empty, (current, x) => current + $"{x:x2}");
}
}
after two days of research it works perfectly! Two different codes give the same result.
js
const sha1 = require('sha1');
const getHash = str =>{
const hashingBytes = Buffer.from(sha1(str), "hex");
const base64Value = Buffer.from(hashingBytes).toString('base64');
return base64Value;
}
c#
System.Security.Cryptography.SHA1 sha = new System.Security.Cryptography.SHA1CryptoServiceProvider();
byte[] bytes = System.Text.Encoding.ASCII.GetBytes(str);
byte[] hashingbytes = sha.ComputeHash(bytes);
var hash = Convert.ToBase64String(hashingbytes);

how to convert arraybuffer to string

I have written a simple TCP server on node.js to send some data to a Chrome app. In the chrome app, when I get the data, I convert that to string using below function, I get an exception "byte length of Uint16Array should be a multiple of 2"
String.fromCharCode.apply(null, new Uint16Array(buffer))
I could not find any information about what could be causing this and how to fix this. Any pointers on this is highly appreciated.
Below is the code in node.js server for sending the data to client:
socket.on('data', function(data) {
console.log('DATA ' + socket.remoteAddress + ': ' + data);
// Write the data back to the socket,
// the client will receive it as data from the server
var r= socket.write('from server\r\n');
});
Below is the code from chrome app:
chrome.sockets.tcp.onReceive.addListener(function (info) {
console.log('onListener registered');
if (info.socketId != socketid)
return;
else {
try {
data = ab2str(info.data);
console.log(data);
}
catch (e) {
console.log(e);
}
}
// info.data is an arrayBuffer.
});
function ab2str(buf) {
return String.fromCharCode.apply(null, new Uint16Array(buf));
}
The modern (Chrome 38+) way to do this would be, assuming the encoding is UTF-8:
var decoder = new TextDecoder("utf-8");
function ab2str(buf) {
return decoder.decode(new Uint8Array(buf));
}
This uses the TextDecoder API; see documentation for more options, such as a different encoding.
See also: Easier ArrayBuffer<->String conversion with the Encoding API
# Google Developers
You're probably seeing this problem because your app has received an odd number of bytes on the socket, but you're trying to create an array of 2-byte-wide items out of it (because that's what fits into a Uint16Array)
If your app receives the string "Hello" over the network (5 bytes), then you can cast that to a Uint8Array, and it will look like this:
Item: 0 1 2 3 4
Char: H e l l o
Uint8 Value: 72 101 108 108 111
casting it to an Uint16Array, though will try to do this:
Item 0 1 2
Chars He ll o?
IntVal 25928 27756 ?????
Without a 6th byte to work with, it can't construct the array, and so you get an exception.
Using a Uint16Array for the data only makes sense if you are expecting UCS-2 string data on the socket. If you are receiving plain ASCII data, then you want to cast that to a Uint8Array instead, and map String.fromCharCode on that. If it's something else, such as UTF-8, then you'll have to do some other conversion.
No matter what, though, the socket layer is always free to send you data in chunks of any length. Your app will have to deal with odd sizes, and save any remainder that you can't deal with right away, so that you can use it when you receive the next chunk of data.
Kind of old and late, but perhaps using this function (original source) works better (it worked for me for decoding arraybuffer to string without leaving some special chars as total garbage):
function decodeUtf8(arrayBuffer) {
var result = "";
var i = 0;
var c = 0;
var c1 = 0;
var c2 = 0;
var data = new Uint8Array(arrayBuffer);
// If we have a BOM skip it
if (data.length >= 3 && data[0] === 0xef && data[1] === 0xbb && data[2] === 0xbf) {
i = 3;
}
while (i < data.length) {
c = data[i];
if (c < 128) {
result += String.fromCharCode(c);
i++;
} else if (c > 191 && c < 224) {
if( i+1 >= data.length ) {
throw "UTF-8 Decode failed. Two byte character was truncated.";
}
c2 = data[i+1];
result += String.fromCharCode( ((c&31)<<6) | (c2&63) );
i += 2;
} else {
if (i+2 >= data.length) {
throw "UTF-8 Decode failed. Multi byte character was truncated.";
}
c2 = data[i+1];
c3 = data[i+2];
result += String.fromCharCode( ((c&15)<<12) | ((c2&63)<<6) | (c3&63) );
i += 3;
}
}
return result;
}
There is an asynchronous way using Blob and FileReader.
You can specify any valid encoding.
function arrayBufferToString( buffer, encoding, callback ) {
var blob = new Blob([buffer],{type:'text/plain'});
var reader = new FileReader();
reader.onload = function(evt){callback(evt.target.result);};
reader.readAsText(blob, encoding);
}
//example:
var buf = new Uint8Array([65,66,67]);
arrayBufferToString(buf, 'UTF-8', console.log.bind(console)); //"ABC"

Categories

Resources