Guidance to understand Base64 encoding algorithm - javascript

I found this algorithm on the net but I'm having a bit of trouble understanding exactly how it works. It encodes an Uint8Array to Base64. I would like to understand especially the sections under the comments "Combine the three bytes into a single integer" and "Use bitmasks to extract 6-bit segments from the triplet". I understood the concept of bit shifting used there, but can't understand what's its purpose in those two sections.
function base64ArrayBuffer(bytes) {
var base64 = ''
var encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
var byteLength = bytes.byteLength
var byteRemainder = byteLength % 3
var mainLength = byteLength - byteRemainder
var a, b, c, d
var chunk
// Main loop deals with bytes in chunks of 3
for (var i = 0; i < mainLength; i = i + 3) {
// Combine the three bytes into a single integer
chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2]
// Use bitmasks to extract 6-bit segments from the triplet
a = (chunk & 16515072) >> 18 // 16515072 = (2^6 - 1) << 18
b = (chunk & 258048) >> 12 // 258048 = (2^6 - 1) << 12
c = (chunk & 4032) >> 6 // 4032 = (2^6 - 1) << 6
d = chunk & 63 // 63 = 2^6 - 1
// Convert the raw binary segments to the appropriate ASCII encoding
base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d]
}
// Deal with the remaining bytes and padding
if (byteRemainder == 1) {
chunk = bytes[mainLength]
a = (chunk & 252) >> 2 // 252 = (2^6 - 1) << 2
// Set the 4 least significant bits to zero
b = (chunk & 3) << 4 // 3 = 2^2 - 1
base64 += encodings[a] + encodings[b] + '=='
} else if (byteRemainder == 2) {
chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1]
a = (chunk & 64512) >> 10 // 64512 = (2^6 - 1) << 10
b = (chunk & 1008) >> 4 // 1008 = (2^6 - 1) << 4
// Set the 2 least significant bits to zero
c = (chunk & 15) << 2 // 15 = 2^4 - 1
base64 += encodings[a] + encodings[b] + encodings[c] + '='
}
return base64
}

The first step takes each group of 3 bytes in the input and combines them into a 24-bit number. If we call them x = bytes[i], y = bytes[i+1], and z = bytes[i+2], it uses bit-shifting and bit-OR to create a 24-bit integer whose bits are:
xxxxxxxxyyyyyyyyzzzzzzzz
Then it extracts these bits in groups of 6 to get 4 numbers. The bits of a, b, c, and d correspond this way:
xxxxxxxxyyyyyyyyzzzzzzzz
aaaaaabbbbbbccccccdddddd
Then for each of these 6-bit numbers, it indexes the encodings string to get a corresponding character, and concatenates them into the base64 result string.
At the end there are some special cases to deal with the last 1 or 2 bytes in the input if it wasn't a multiple of 3 bytes long.

Related

Javascript - turning on bits

I have some understanding about bits and bytes, shifting concept and so - but no actual experience with it.
So:
I need to turn an array of true and false into a buffer, made of 1344 bits (which i send using UDP packets).
The other side will evaluate the buffer bit by bit..
Since i'm new to nodeJs, feel free to add tips or point me to new directions.
var arrBinary = new Array(1344);
for(i=0;i<1344;i++)arrBinary[i]=0;
// some code here, which will turn some of the array's elements to 1
var arrForBuffer = new Array(168);
for(i=0;i<168;i++)arrForBuffer[i]=0;
var x = buffer.from(arr);
/****** the question ******/
// How to change and set arrForBuffer so it will represent the arrBinary Bits state?
You can use some bitshifting as you said:
// arrForBuffer must be initialized with 0s
for(let i = 0; i < 1344; i++)
arrForBuffer[ Math.floor(i / 8) ] += arrBinary[i] << (7 - (i % 8));
The first bit for example of arrBinary will be left shifted by 7 and added to the first byte, the second will be shifted left by 6, and so on. The 8th will be shifted left by 7 again, and will be added to the second byte.
It might be more readable (and possibly more performant), if it would be written as:
for(let byte = 0; byte < 168; byte++) {
arrForBuffer[byte] =
arrBinary[byte * 8 + 0] << 7 |
arrBinary[byte * 8 + 1] << 6 |
arrBinary[byte * 8 + 2] << 5 |
arrBinary[byte * 8 + 3] << 4 |
arrBinary[byte * 8 + 4] << 3 |
arrBinary[byte * 8 + 5] << 2 |
arrBinary[byte * 8 + 6] << 1 |
arrBinary[byte * 8 + 7];
}
Javascript supports bits operations like in every major language. You can use the | and << operators to achieve this transformation:
const size = 16;
const packsize = 8;
const arrBinary = new Array(size).fill(false);
arrBinary[2] = true;
arrBinary[6] = true;
arrBinary[8] = true;
let arrForBuffer = new Array(size / packsize);
let acc = 0;
let byteCounter = 0;
for (let i = 0; i < arrBinary.length; i++) {
if (arrBinary[i]) {
acc |= 1 << (i % packsize);
}
if (i % packsize == packsize - 1) {
arrForBuffer[byteCounter] = acc;
byteCounter++;
acc = 0;
}
}
for (let i = 0; i < arrForBuffer.length; i++) {
console.log(`${i}: ${arrForBuffer[i]}`);
}

How to get Number of bits from Hex

For example how do I get 8 from 0x01, or 16 from 0x0001.
I want to know the number of bits a variable has.
var someNumber = 0x123456;
var len = whatToDoHere(someNumber);
console.log(len); // => 24 for example
Here are few alternatives:
f1 = n => (Math.log2(n) & -8) + 8 // log2 (-Infinity & -8 = 0)
f2 = n => ((n >>= 8) && f2(n)) + 8 // recursion
f3 = n => n.toString(16).length + 1 << 2 & -8 // string length
for (n of [0, 0xff, 0x100, 0xffff, 0x10000, 0xffffff])
console.log( f1(n) + '\t' + f2(n) + '\t' + f3(n) + '\t0x' + n.toString(16) )
A number is just a number, it does not have any particular representation attached to it. Even if you say that it would be formatted in base16 (as hex) or base256 (bytes), that doesn't say anything about the number of leading zeroes (as in 0x01 vs 0x0001). If you know that however, you'd already know how many digits your formatted number has.
Another possible solution is to convert to a hex string and then measure the length of the string:
const someNumber = 0x123456;
const hexString = someNumber.toString( 16 );
const numberOfBits = hexString.length * 4; //each character is half a byte
console.log( "Number of bits: ", numberOfBits );

base 64 encoding in javascript

Below is a base 64 image encoding function that I got from Philippe Tenenhaus (http://www.philten.com/us-xmlhttprequest-image/).
It's very confusing to me, but I'd love to understand.
I think I understand the bitwise & and | , and moving through byte position with << and >>.
I'm especially confused at those lines :
((byte1 & 3) << 4) | (byte2 >> 4);
((byte2 & 15) << 2) | (byte3 >> 6);
And why it still using byte1 for enc2, and byte2 for enc3.
And the purpose of enc4 = byte3 & 63; ...
Can someone could explain this function.
function base64Encode(inputStr)
{
var b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
var outputStr = "";
var i = 0;
while (i < inputStr.length)
{
//all three "& 0xff" added below are there to fix a known bug
//with bytes returned by xhr.responseText
var byte1 = inputStr.charCodeAt(i++) & 0xff;
var byte2 = inputStr.charCodeAt(i++) & 0xff;
var byte3 = inputStr.charCodeAt(i++) & 0xff;
var enc1 = byte1 >> 2;
var enc2 = ((byte1 & 3) << 4) | (byte2 >> 4);
var enc3, enc4;
if (isNaN(byte2))
{
enc3 = enc4 = 64;
}
else
{
enc3 = ((byte2 & 15) << 2) | (byte3 >> 6);
if (isNaN(byte3))
{
enc4 = 64;
}
else
{
enc4 = byte3 & 63;
}
}
outputStr += b64.charAt(enc1) + b64.charAt(enc2) + b64.charAt(enc3) + b64.charAt(enc4);
}
return outputStr;
}
It probably helps to understand what Base64 encoding does. It converts 24 bits in groupings of 8 bits into groupings of 6 bits. (http://en.wikipedia.org/wiki/Base64)
So enc1, is the first 6-bits which are the first 6-bits of the first Byte.
enc2, is the next 6-bits, the last 2-bits of the first Byte and first 4-bits of the second Byte. The bitwise and operation byte1 & 3 targets the last 2 bits in the first Byte.
So,
XXXXXXXX & 00000011 = 000000XX
It is then shifted to the left 4 bits.
000000XX << 4 = 00XX0000.
The byte2 >> 4 performs a right bit shift, isolating the first 4 bits of the second Byte, shown below
YYYYXXXX >> 4 = 0000YYYY
So, ((byte1 & 3) << 4) | (byte2 >> 4) combines the results with a bitwise or
00XX0000 | 0000YYYY = 00XXYYYY
enc3, is the last 4-bits of the second byte and the first 2-bits of the 3rd Byte.
enc4 is the last 6-bits of the 3rd Byte.
charCodeAt returns a Unicode code point which is a 16-bit value, so it appears there is an assumption that the relevant information is only in the low 8-bits. This assumption makes me wonder if there still is a bug in the code. There could be some information lost as a result of this assumption.

JavaScript equivalent to htonl?

For an AJAX request, I need to send a magic number as the first four bytes of the request body, most significant byte first, along with several other (non-constant) values in the request body. Is there something equivalent to htonl in JavaScript?
For example, given 0x42656566, I need to produce the string "Beef". Unfortunately, my number is along the lines of 0xc1ba5ba9. When the server reads the request, it is getting the value -1014906182 (instead of -1044751447).
There's no built-in function, but something like this should work:
// Convert an integer to an array of "bytes" in network/big-endian order.
function htonl(n)
{
// Mask off 8 bytes at a time then shift them into place
return [
(n & 0xFF000000) >>> 24,
(n & 0x00FF0000) >>> 16,
(n & 0x0000FF00) >>> 8,
(n & 0x000000FF) >>> 0,
];
}
To get the bytes as a string, just call String.fromCharCode on each byte and concatenate them:
// Convert an integer to a string made up of the bytes in network/big-endian order.
function htonl(n)
{
// Mask off 8 bytes at a time then shift them into place
return String.fromCharCode((n & 0xFF000000) >>> 24) +
String.fromCharCode((n & 0x00FF0000) >>> 16) +
String.fromCharCode((n & 0x0000FF00) >>> 8) +
String.fromCharCode((n & 0x000000FF) >>> 0);
}
Simplified version http://jsfiddle.net/eZsTp/ :
function dot2num(dot) { // the same as ip2long in php
var d = dot.split('.');
return ((+d[0]) << 24) +
((+d[1]) << 16) +
((+d[2]) << 8) +
(+d[3]);
}
function num2array(num) {
return [
(num & 0xFF000000) >>> 24,
(num & 0x00FF0000) >>> 16,
(num & 0x0000FF00) >>> 8,
(num & 0x000000FF)
];
}
function htonl(x)
{
return dot2num(num2array(x).reverse().join('.'));
}
var ipbyte = dot2num('12.34.56.78');
alert(ipbyte);
var inv = htonl(ipbyte);
alert(inv + '=' + num2array(inv).join('.'));

Javascript unsigned short to signed short

I have the following code:
var v = [0xFF, 0xFF];
alert((v[0]<<8) | v[1]);
And it alerts 65535 (the max short value).
How can I treat this byte array as a signed short, and get the signed value of this array.
Assuming the higher bit is the sign:
var sign = v[0] & (1 << 7);
var i = ((v[0] & 0x7F) << 8) | v[1];
if (sign) {
i = -i;
}
http://jsfiddle.net/p4TQw/1/
If you use the Two's complement representation:
var i = (((v[0] << 8) | v[1]) << 16) >> 16);
The 16 bits left shift moves all bits to the left; and the arithmetic 16 bits right shift takes care of the sign while shifting. (Javascript uses 32 bits integers for shift operations.)
http://jsfiddle.net/p4TQw/3/

Categories

Resources