I'm working with data that comes over a WebSocket connection with a starcraft 2 client to obtain image data from a game that is in progress. In some cases, the image data maybe be set with a format of 1 bit per pixel. When this happens I need to "unpack" the bits from each byte in the response (1 byte => 8 bits). This is done in the code below:
function unpackbits(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte
let offset
for (let i = 0; i < uint8data.length; i++) {
byte = uint8data[i]
offset = (8 * i)
results[offset + 7] = ((byte & (1 << 0)) >> 0)
results[offset + 6] = ((byte & (1 << 1)) >> 1)
results[offset + 5] = ((byte & (1 << 2)) >> 2)
results[offset + 4] = ((byte & (1 << 3)) >> 3)
results[offset + 3] = ((byte & (1 << 4)) >> 4)
results[offset + 2] = ((byte & (1 << 5)) >> 5)
results[offset + 1] = ((byte & (1 << 6)) >> 6)
results[offset + 0] = ((byte & (1 << 7)) >> 7)
}
return results
}
This gets fed into a tensor like so:
static unpack_layer(plane) {
//Return a correctly shaped tensor given the feature layer bytes.//
const size = point.Point.build(plane.getSize()) // { x, y }
if (plane.getBitsPerPixel() === 1) {
data = unpackbits(data)
if (data.length !== (size.x * size.y)) {
// This could happen if the correct length isn't a multiple of 8, leading
// to some padding bits at the end of the string which are incorrectly
// interpreted as data.
data = data.slice(0, size.x * size.y)
}
}
data = tf.tensor(data, [size.y, size.x], 'int32')
return data
}
In one of my tests, this code get's run 1900 times and takes 0.0737s to execute.
This is very slow.
For comparison, the equivalent functionality in python takes 0.0209s to run 1900 times. The python code looks like this:
def unpack_layer(plane):
"""Return a correctly shaped numpy array given the feature layer bytes."""
size = point.Point.build(plane.size) # {x, y }
data = np.frombuffer(plane.data, dtype=Feature.dtypes[plane.bits_per_pixel])
if plane.bits_per_pixel == 1:
data = np.unpackbits(data)
if data.shape[0] != size.x * size.y:
# This could happen if the correct length isn't a multiple of 8, leading
# to some padding bits at the end of the string which are incorrectly
# interpreted as data.
data = data[:size.x * size.y]
return data.reshape(size.y, size.x)
In short, it takes the javascript version roughly 4x as long as the python version.
I'll be looking at the numpy unpackbits documentation as that seems to be doing something much more efficient than my own approach -
However, I was wondering if anyone had any thoughts as to how I could better optimize my own unpackbits function or better yet a way to have TensorFlow do that for me?
Not sure if this helps, but am kicking myself as I got hung up on the need for bitwise operators in tensorflow in order to convert a byte stream into a bit stream, per the original question. Simple use of integer division and modulus can do the trick too!
In short, the algorithm by example is thus. Given byte stream of [ 92 ]...
Divide and mod by 16, resulting in 2 bytes, namely [ 5 ] and [ 12 ] respectively.
Interleave these results into a tensor [ 5, 12 ].
Take each of those values, and divide and mod by 4, resulting in [ 1, 3 ] and [ 1, 0 ].
Interleave these results into a tensor [ 1, 1, 3, 0 ].
Divide and mod by 2, resulting in [ 0, 0, 1, 0 ] and [ 1, 1, 1, 0 ].
Interleave into [ 0, 1, 0, 1, 1, 1, 0, 0 ] which is binary for 92.
Below are two versions of the same algorithm. One in tensorflow and one in pure javascript.
function tfDaC( stream ) {
const stream8bit = tf.tensor( stream, undefined, 'int32' );
console.time('in-tf');
const stream4bitHi = tf.div(stream8bit, tf.scalar(16, 'int32' ));
const stream4bitLo = tf.mod(stream8bit, tf.scalar(16, 'int32' ));
const stream4bit = tf.stack([stream4bitHi, stream4bitLo],1).flatten();
const stream2bitHi = tf.div( stream4bit, tf.scalar(4, 'int32' ));
const stream2bitLo = tf.mod(stream4bit, tf.scalar(4, 'int32' ));
const stream2bit = tf.stack([stream2bitHi, stream2bitLo],1).flatten();
const stream1bitHi = tf.div(stream2bit, tf.scalar(2, 'int32' ));
const stream1bitLo = tf.mod(stream2bit, tf.scalar(2, 'int32' ));
const stream1bit = tf.stack([stream1bitHi, stream1bitLo],1).flatten().toBool();
console.timeEnd('in-tf');
return stream1bit.dataSync().buffer;
}
function jsDaC( stream ) {
let result = new ArrayBuffer( stream.byteLength * 8 );
let buffer32 = new Uint32Array( result ); // Pointer to every 4 bytes!
for ( let i = 0; i < stream.byteLength; i++ ) {
let byte = stream[ i ];
buffer32[ (i * 2) |0 ] = ( byte / 16) |0;
buffer32[ (i * 2 + 1) |0 ] = ( byte % 16 ) |0;
}
let buffer16 = new Uint16Array( result ); // Pointer to every 2 bytes!
for ( let i = 0; i < buffer32.length; i++ ) {
let byte = buffer32[ i ];
buffer16[ (i * 2) |0 ] = ( byte / 4) |0;
buffer16[ (i * 2 + 1) |0 ] = ( byte % 4 ) |0;
}
let buffer8 = new Uint8Array( result ); // Pointer to every 4 bytes!
for ( let i = 0; i < buffer16.length; i++ ) {
let byte = buffer16[ i ];
buffer8[ (i * 2) |0 ] = ( byte / 2 ) |0;
buffer8[ (i * 2 + 1) |0 ] = ( byte % 2 ) |0;
}
return result;
}
console.log( 'Generating array of 1M bytes' );
let buffer = new ArrayBuffer( 1000000 );
let testArray = new Uint8Array( buffer );
for ( let i = 0; i < testArray.length; i++ ) {
testArray[ i ] = Math.floor( 256 * Math.random() );
}
let result;
console.log( 'Begin tensorflow divide & conquer test with 1M bytes.' );
console.time( 'tf' );
result = tfDaC( testArray );
console.timeEnd( 'tf' );
console.log( `End tensorflow test with 1M bytes resulting in array of ${result.byteLength} bytes` );
console.log( 'Begin javascript divide & conquer test with 1M bytes.' );
console.time( 'js' );
result = jsDaC( testArray );
console.timeEnd( 'js' );
console.log( `End javascript test with 1M bytes resulting in array of ${result.byteLength} bytes` );
<script src="https://cdn.jsdelivr.net/npm/#tensorflow/tfjs#2.0.1/dist/tf.min.js"></script>
The tensorflow performance was terrible on my workstation. I had to reduce the byte stream down to 1M bytes as my GPU was throwing memory errors at my previous test levels of a 10M byte stream. And even then at only 1M bytes, a handful of tests ranged from 1236ms to 1414ms. Not sure why it was so slow. Could possibly be the coercion of the numbers to int32 which might be adding a lot of overhead, as my understanding is that GPU's are generally built primarily for floating point operations. And marshalling the data onto and off of the GPU consumes some time too. Maybe it's worthwhile to try to convert this function to a floating point only function rather than int32...?! Maybe a grabbed a poor version of tensorflow.js...?! Be interested to hear how it runs in your NodeJS configuration...
On the other hand, the javascript version for 1M bytes ranged from 30ms to 42ms, almost 2 orders of magnitude(!) faster than the GPU. But still, when extrapolating these results to 10M bytes, this algorithm is still slower than all the other previous algorithms...
So not sure if this helps. It might simply help eliminate tensorflow as an option, although it might still be worthwhile trying floats rather than int32, but am not very hopeful...
It looks like tensorflow.js does not have a bitwise AND function, so suspect doing the work within tensorflow.js will require some coding gymnastics...
One suggestion, though, is to create an array of 256 Uint8Array's of size 8, and pre-populate it with the complete list of 8 byte translations. This greatly reduces the repeated calculations for a byte stream that will likely have repeated values in the range of 0 - 255. Eg, the first entry in the precomputed array represents the unpacking of byte 0, and therefore is a Uint8Array of size 8 populated with 0's, the next entry is another Uint8Array of size 8 populated with 00000001, etc all the way to the entry representing byte 255 with is a Uint8Array of size 8 populated with all 1's.
Then, when unpacking, simply make use of the typed array .set method to copy the precomputed unpacked representation into the results Uint8Array...
Hope this helps.
EDIT Created a number of variants of the unpacking algorithm to test the performance of inline calculations vs memory lookup and was surprised at the results using Chrome. Some of the optimizations of the V8 compiler are non-intuitive...
The differences in the versions...
unpackbits [FAST]: From the original question and this is the bar by which the others variations are compared.
unpackbits1 [FAST]: Modified by...
Specifying "|0" after every integer.
Using the increment unary op ( "++" ) rather adding increments to the offset index of the result array.
Replacing the calculation of bit masks with the actual value. (Ie, rather than 1 << 5, the function used 32.)
unpackbits1a [FAST]: The same as unpackbits1, except...
Kept the calculation of bit masks rather than integer values. (Ie, using 1 << 5 rather than 32, as in the original question.) Counter intuitively, this produces a bit faster result!
unpackbits1b [SLOWER]: The same as unpackbits1a, except...
The offset is not recomputed every time inside the loop. Ie, offset = 0|0 is initially set, and then thereafter offset is only incremented within the loop. So, offset = ( (8|0) * i ) is no longer calculated for every byte. Counter intuitively, this produces a slower result!
unpackbits2 [SLOWEST]: This is the memory lookup option that I recommended above. Counter intuitively, this implies that typed array memory operations are much slower than calculating the results as in unpackbits!
unpackbits3 [SLOWER]: This is the memory lookup option that I recommended above, with the following change.
Rather than used the the typed array .set method, this version set the eight bytes one-by-one. Counter intuitively, this implies that the typed array .set method is slower (at least for eight bytes) than individually setting the values!
unpackbits4 [SLOWER]: This variation of the algorithm was on par with the original, and was a variation of the memory lookup option. But, rather than 256 individual Uint8Array's, this combined all the pre-calculated results into a single Uint8Array of length 256 * 8. And it did not make use of the typed array .set method.
unpackbits5 [SLOWER]: Same as unpackbits4, except...
Rather than using the unary "++" on the index into the lookup table, it calculated the index for each of the 8 bytes being copied. As expected, calculating the index every time was slower than using the unary "++" operator.
Here are the tests. BEWARE that this builds an initial array of 10M random bytes, and then runs each unpack algorithm on this same data. On my workstation, the test runs in less than 5 seconds.
var lookupTable = initializeLookupTable();
function initializeLookupTable() {
let lookup = new Array( 256 );
let v = new Uint8Array( 1 );
for ( let i = 0; i < 256; i++ ) {
v[ 0 ] = i;
lookup[ i ] = unpackbits( v );
}
return lookup;
}
var lookupTable4 = initializeLookupTable4();
function initializeLookupTable4() {
let lookup = new Uint8Array( 256 * 8 );
let v = new Uint8Array( 1 );
for ( let i = 0; i < 256; i++ ) {
v[ 0 ] = i;
let temp = unpackbits( v );
lookup.set( temp, i * 8 );
}
return lookup;
}
function unpackbits(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte
let offset
for (let i = 0; i < uint8data.length; i++) {
byte = uint8data[i]
offset = (8 * i);
results[offset + 7] = ((byte & (1 << 0)) >> 0)
results[offset + 6] = ((byte & (1 << 1)) >> 1)
results[offset + 5] = ((byte & (1 << 2)) >> 2)
results[offset + 4] = ((byte & (1 << 3)) >> 3)
results[offset + 3] = ((byte & (1 << 4)) >> 4)
results[offset + 2] = ((byte & (1 << 5)) >> 5)
results[offset + 1] = ((byte & (1 << 6)) >> 6)
results[offset + 0] = ((byte & (1 << 7)) >> 7)
}
return results
}
function unpackbits1(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte;
let offset;
for (let i = 0|0, n = uint8data.length; i < n; i++) {
byte = uint8data[i]|0
offset = (8 * i)|0;
results[offset++] = ((byte & 128)>>7)|0;
results[offset++] = ((byte & 64)>>6)|0;
results[offset++] = ((byte & 32)>>5)|0;
results[offset++] = ((byte & 16)>>4)|0;
results[offset++] = ((byte & 8)>>3)|0;
results[offset++] = ((byte & 4)>>2)|0;
results[offset++] = ((byte & 2)>>1)|0;
results[offset++] = ((byte & 1)>>0)|0;
}
return results
}
function unpackbits1a(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte;
let offset;
for (let i = 0|0, n = uint8data.length; i < n; i++) {
byte = uint8data[i]|0;
offset = (8 * i)|0;
results[offset++] = ((byte & (1 << 7))>>7)|0;
results[offset++] = ((byte & (1 << 6))>>6)|0;
results[offset++] = ((byte & (1 << 5))>>5)|0;
results[offset++] = ((byte & (1 << 4))>>4)|0;
results[offset++] = ((byte & (1 << 3))>>3)|0;
results[offset++] = ((byte & (1 << 2))>>2)|0;
results[offset++] = ((byte & (1 << 1))>>1)|0;
results[offset++] = (byte & 1)|0;
}
return results
}
function unpackbits1b(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte;
let offset = 0|0;
for (let i = 0|0, n = uint8data.length; i < n; i++) {
byte = uint8data[i]|0;
results[offset++] = ((byte & (1 << 7))>>7)|0;
results[offset++] = ((byte & (1 << 6))>>6)|0;
results[offset++] = ((byte & (1 << 5))>>5)|0;
results[offset++] = ((byte & (1 << 4))>>4)|0;
results[offset++] = ((byte & (1 << 3))>>3)|0;
results[offset++] = ((byte & (1 << 2))>>2)|0;
results[offset++] = ((byte & (1 << 1))>>1)|0;
results[offset++] = (byte & 1)|0;
}
return results
}
function unpackbits2( uint8data ) {
const result = new Uint8Array( 8 * uint8data.length );
for ( let i = 0|0, ri = 0|0, n = uint8data.length; i < n; i++, ri += 8 ) {
result.set( lookupTable[ uint8data[ i ] ], ri );
}
return result;
}
function unpackbits3( uint8data ) {
const result = new Uint8Array( 8 * uint8data.length );
let ri = 0|0;
for ( let i = 0|0, n = uint8data.length; i < n; i++ ) {
//result.set( lookupTable[ uint8data[ i ] ], ri );
let lv = lookupTable[ uint8data[ i ] ];
result[ ri++ ] = lv [ 0|0 ];
result[ ri++ ] = lv [ 1|0 ];
result[ ri++ ] = lv [ 2|0 ];
result[ ri++ ] = lv [ 3|0 ];
result[ ri++ ] = lv [ 4|0 ];
result[ ri++ ] = lv [ 5|0 ];
result[ ri++ ] = lv [ 6|0 ];
result[ ri++ ] = lv [ 7|0 ];
}
return result;
}
function unpackbits4( uint8data ) {
const result = new Uint8Array( 8 * uint8data.length );
let ri = 0|0;
for ( let i = 0|0, n = uint8data.length; i < n; i++ ) {
let li = (uint8data[ i ] * 8)|0;
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
result[ ri++ ] = lookupTable4[ li++ ];
}
return result;
}
function unpackbits5( uint8data ) {
const result = new Uint8Array( 8 * uint8data.length );
let ri = 0|0;
for ( let i = 0|0, n = uint8data.length; i < n; i++ ) {
let li = (uint8data[ i ] * 8)|0;
result[ ri++ ] = lookupTable4[ li ];
result[ ri++ ] = lookupTable4[ li+1 ];
result[ ri++ ] = lookupTable4[ li+2 ];
result[ ri++ ] = lookupTable4[ li+3 ];
result[ ri++ ] = lookupTable4[ li+4 ];
result[ ri++ ] = lookupTable4[ li+5 ];
result[ ri++ ] = lookupTable4[ li+6 ];
result[ ri++ ] = lookupTable4[ li+7 ];
}
return result;
}
// Test
console.log( 'Building array of 10,000,000 test values.' );
let buffer = new ArrayBuffer( 10000000 );
let testArray = new Uint8Array( buffer );
for ( let i = 0; i < testArray.length; i++ ) {
testArray[ i ] = Math.floor( 256 * Math.random() );
}
console.log( 'Finished building test values.' );
console.log( 'Starting unpackbits.' );
console.time('u');
let u = unpackbits( testArray );
console.timeEnd('u');
console.log( 'Finished unpackbits.' );
console.log( 'Starting unpackbits1.' );
console.time('u1');
u = unpackbits1( testArray );
console.timeEnd('u1');
console.log( 'Finished unpackbits1.' );
console.log( 'Starting unpackbits1a.' );
console.time('u1a');
u = unpackbits1a( testArray );
console.timeEnd('u1a');
console.log( 'Finished unpackbits1a.' );
console.log( 'Starting unpackbits1b.' );
console.time('u1b');
u = unpackbits1b(testArray );
console.timeEnd('u1b');
console.log( 'Finished unpackbits1b.' );
console.log( 'Starting unpackbits2.' );
console.time('u2');
u = unpackbits2( testArray );
console.timeEnd('u2');
console.log( 'Finished unpackbits2.' );
console.log( 'Starting unpackbits3.' );
console.time('u3');
u = unpackbits3( testArray );
console.timeEnd('u3');
console.log( 'Finished unpackbits3.' );
console.log( 'Starting unpackbits4.' );
console.time('u4');
u = unpackbits4( testArray );
console.timeEnd('u4');
console.log( 'Finished unpackbits4.' );
console.log( 'Starting unpackbits5.' );
console.time('u5');
u = unpackbits5( testArray );
console.timeEnd('u5');
console.log( 'Finished unpackbits5.' );
This response is a continuation of the comment chain under #Jon Trent's answer.
EDIT: Include TensorFlow comparison for the reshaping portion.
I'm profiling the performance of two unpacking bits methods; unpackbits1a, and unpackbits (original). I am also profiling the different methods for reshaping the data to a NxM grid, where N is probably the same as M. Here's what I got:
function unpackbits1a(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte;
let offset;
for (let i = 0|0, n = uint8data.length; i < n; i++) {
byte = uint8data[i]
offset = ((8|0) * i); // The "|0" on this line cut's the time almost in half!
results[offset++] = (byte & ((1|0) << (7|0)))>>7|0;
results[offset++] = (byte & ((1|0) << (6|0)))>>6|0;
results[offset++] = (byte & ((1|0) << (5|0)))>>5|0;
results[offset++] = (byte & ((1|0) << (4|0)))>>4|0;
results[offset++] = (byte & ((1|0) << (3|0)))>>3|0;
results[offset++] = (byte & ((1|0) << (2|0)))>>2|0;
results[offset++] = (byte & ((1|0) << (1|0)))>>1|0;
results[offset++] = (byte & (1|0));
}
return results
}
function unpackbits(uint8data) {
const results = new Uint8Array(8 * uint8data.length)
let byte
let offset
for (let i = 0; i < uint8data.length; i++) {
byte = uint8data[i]
offset = 8 * i
results[offset + 7] = ((byte & (1 << 0)) >> 0)
results[offset + 6] = ((byte & (1 << 1)) >> 1)
results[offset + 5] = ((byte & (1 << 2)) >> 2)
results[offset + 4] = ((byte & (1 << 3)) >> 3)
results[offset + 3] = ((byte & (1 << 4)) >> 4)
results[offset + 2] = ((byte & (1 << 5)) >> 5)
results[offset + 1] = ((byte & (1 << 6)) >> 6)
results[offset + 0] = ((byte & (1 << 7)) >> 7)
}
return results
}
function unpackbitsToShape1(uint8data, shape = [1, 1]) {
var data = unpackbits(uint8data)
const dims = [shape[0] | 0, shape[1] | 0]
const result = new Array(dims[0])
let temp
const width = 0 | dims[1]
for (let i = 0 | 0; i < dims[0]; i++) {
temp = new Array(dims[1])
for (let j = 0| 0; j < dims[1]; j++) {
temp[j] = data[uint8data[i * width + j]]
}
result[i] = temp
}
return result
}
function unpackbitsToShape2(uint8data, shape = [1, 1]) {
var data = unpackbits(uint8data)
const dims = [shape[0] | 0, shape[1] | 0]
const result = new Array(dims[0])
const width = dims[1]
let offset
for (let i = 0 | 0; i < dims[0]; i++) {
offset = (width * i)
result[i] = data.slice(offset, offset + width)
}
return result
}
function unpackbitsToShape3(uint8data, shape = [1, 1]) {
const dims = [0 | shape[0], 0 | shape[1]]
const result = new Array(dims[0])
let position = 0 | 0
const smallCount = 0 | (uint8data.length % dims[0])
const bigCount = 0 | (uint8data.length - smallCount)
const bigByteChunk = 0 | (bigCount / dims[0])
const bigBitWidth = 0 | 8 * bigByteChunk
const smallByteChunk = 0 | (smallCount / dims[0])
const smallBitWidth = 0 | 8 * smallByteChunk
if (smallCount) {
let big
let small
let odd
let temp
for (let i = 0 | 0; i < dims[0]; i++) {
temp = new Uint8Array(dims[1])
odd = i % 2
big = unpackbits(uint8data.subarray(position, position + bigByteChunk))
position += bigByteChunk
if (odd) {
temp.set(small.subarray(smallBitWidth, 8), 0)
temp.set(big, smallBitWidth)
result[i] = temp
} else {
temp.set(big, 0)
small = unpackbits(uint8data.subarray(position, position + 1))
position++
temp.set(small.subarray(0, smallBitWidth), bigBitWidth)
result[i] = temp
}
}
return result
}
for (let i = 0 | 0; i < dims[0]; i++) {
// console.log('unpacking: ', uint8data.subarray(position, position + bigByteChunk))
result[i] = unpackbits(uint8data.subarray(position, position + bigByteChunk))
position += bigByteChunk
}
return result
}
var tf = require('#tensorflow/tfjs')
tf = require('#tensorflow/tfjs-node')
function unpackBitsToShapeTensorflow(uint8data, shape) {
return tf.tensor(unpackbits(uint8data), shape, 'int32')
}
var test64by64 = new Uint8Array(512)
for (let i = 0; i < test64by64.length; i++) {
test64by64[ i ] = Math.floor(256 * Math.random());
}
var test84by84 = new Uint8Array(882)
for (let i = 0; i < test84by84.length; i++) {
test84by84[ i ] = Math.floor(256 * Math.random());
}
var test100by100 = new Uint8Array(1250)
for (let i = 0; i < test100by100.length; i++) {
test100by100[ i ] = Math.floor(256 * Math.random());
}
function assert(condition, errMsg) {
if (!condition) {
console.error(errMsg)
}
}
console.log('********* 64 x 64 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
var foo = unpackbits1a(test64by64);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test64by64);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test64by64, [64, 64])
console.timeEnd('u1');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test64by64, [64, 64])
console.timeEnd('u2');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test64by64, [64, 64])
console.timeEnd('u3');
assert(
foo.length === 64 && foo[0].length === 64,
'foo.length === 64 && foo[0].length === 64'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test64by64, [64, 64])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
console.log('\n\n********* 84 x 84 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
foo = unpackbits1a(test84by84);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test84by84);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test84by84, [84, 84])
console.timeEnd('u1');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test84by84, [84, 84])
console.timeEnd('u2');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test84by84, [84, 84])
console.timeEnd('u3');
assert(
foo.length === 84 && foo[0].length === 84,
'foo.length === 84 && foo[0].length === 84'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test84by84, [84, 84])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
console.log('\n\n********* 100 x 100 *********\n\n')
console.log('Starting unpackbits1a.');
console.time('u1a');
foo = unpackbits1a(test100by100);
console.timeEnd('u1a');
console.log('Finished unpackbits1a.');
console.log('Starting "unpackbits"');
console.time('u-orig');
foo = unpackbits(test100by100);
console.timeEnd('u-orig');
console.log('Finished unpackbits.');
console.log('Starting "unpackbitsToShape1"');
console.time('u1');
foo = unpackbitsToShape1(test100by100, [100, 100])
console.timeEnd('u1');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape1.');
console.log('Starting "unpackbitsToShape2"');
console.time('u2');
foo = unpackbitsToShape2(test100by100, [100, 100])
console.timeEnd('u2');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape2.');
console.log('Starting "unpackbitsToShape3"');
console.time('u3');
foo = unpackbitsToShape3(test100by100, [100, 100])
console.timeEnd('u3');
assert(
foo.length === 100 && foo[0].length === 100,
'foo.length === 100 && foo[0].length === 100'
)
console.log('Finished unpackbitsToShape3.');
console.log('\nStarting "unpackBitsToShapeTensorflow"')
console.time('u-tensor')
foo = unpackBitsToShapeTensorflow(test100by100, [100, 100])
console.timeEnd('u-tensor')
console.log('Finished unpackBitsToShapeTensorflow.');
I don't know how different the browser's execution environment is than node, but results seem more stable in node. Here's what I get:
********* 64 x 64 *********
Starting unpackbits1a.
u1a: 0.513ms
Finished unpackbits1a.
Starting "unpackbits"
u-orig: 0.189ms
Finished unpackbits.
Starting "unpackbitsToShape1"
u1: 0.434ms
Finished unpackbitsToShape1.
Starting "unpackbitsToShape2"
u2: 0.365ms
Finished unpackbitsToShape2.
Starting "unpackbitsToShape3"
u3: 0.590ms
Finished unpackbitsToShape3.
Starting "unpackBitsToShapeTensorflow"
u-tensor: 0.508ms
Finished unpackBitsToShapeTensorflow.
********* 84 x 84 *********
Starting unpackbits1a.
u1a: 0.222ms
Finished unpackbits1a.
Starting "unpackbits"
u-orig: 0.425ms
Finished unpackbits.
Starting "unpackbitsToShape1"
u1: 0.622ms
Finished unpackbitsToShape1.
Starting "unpackbitsToShape2"
u2: 0.303ms
Finished unpackbitsToShape2.
Starting "unpackbitsToShape3"
u3: 0.388ms
Finished unpackbitsToShape3.
Starting "unpackBitsToShapeTensorflow"
u-tensor: 0.175ms
Finished unpackBitsToShapeTensorflow.
********* 100 x 100 *********
Starting unpackbits1a.
u1a: 1.502ms
Finished unpackbits1a.
Starting "unpackbits"
u-orig: 0.018ms
Finished unpackbits.
Starting "unpackbitsToShape1"
u1: 1.631ms
Finished unpackbitsToShape1.
Starting "unpackbitsToShape2"
u2: 0.072ms
Finished unpackbitsToShape2.
Starting "unpackbitsToShape3"
u3: 0.159ms
Finished unpackbitsToShape3.
Starting "unpackBitsToShapeTensorflow"
u-tensor: 0.052ms
Finished unpackBitsToShapeTensorflow.
function des (key, message, encrypt, mode, iv, padding) {
//declaring this locally speeds things up a bit
var spfunction1 = new Array (0x1010400,0,0x10000,0x1010404,0x1010004,0x10404,0x4,0x10000,0x400,0x1010400,0x1010404,0x400,0x1000404,0x1010004,0x1000000,0x4,0x404,0x1000400,0x1000400,0x10400,0x10400,0x1010000,0x1010000,0x1000404,0x10004,0x1000004,0x1000004,0x10004,0,0x404,0x10404,0x1000000,0x10000,0x1010404,0x4,0x1010000,0x1010400,0x1000000,0x1000000,0x400,0x1010004,0x10000,0x10400,0x1000004,0x400,0x4,0x1000404,0x10404,0x1010404,0x10004,0x1010000,0x1000404,0x1000004,0x404,0x10404,0x1010400,0x404,0x1000400,0x1000400,0,0x10004,0x10400,0,0x1010004);
var spfunction2 = new Array (-0x7fef7fe0,-0x7fff8000,0x8000,0x108020,0x100000,0x20,-0x7fefffe0,-0x7fff7fe0,-0x7fffffe0,-0x7fef7fe0,-0x7fef8000,-0x80000000,-0x7fff8000,0x100000,0x20,-0x7fefffe0,0x108000,0x100020,-0x7fff7fe0,0,-0x80000000,0x8000,0x108020,-0x7ff00000,0x100020,-0x7fffffe0,0,0x108000,0x8020,-0x7fef8000,-0x7ff00000,0x8020,0,0x108020,-0x7fefffe0,0x100000,-0x7fff7fe0,-0x7ff00000,-0x7fef8000,0x8000,-0x7ff00000,-0x7fff8000,0x20,-0x7fef7fe0,0x108020,0x20,0x8000,-0x80000000,0x8020,-0x7fef8000,0x100000,-0x7fffffe0,0x100020,-0x7fff7fe0,-0x7fffffe0,0x100020,0x108000,0,-0x7fff8000,0x8020,-0x80000000,-0x7fefffe0,-0x7fef7fe0,0x108000);
var spfunction3 = new Array (0x208,0x8020200,0,0x8020008,0x8000200,0,0x20208,0x8000200,0x20008,0x8000008,0x8000008,0x20000,0x8020208,0x20008,0x8020000,0x208,0x8000000,0x8,0x8020200,0x200,0x20200,0x8020000,0x8020008,0x20208,0x8000208,0x20200,0x20000,0x8000208,0x8,0x8020208,0x200,0x8000000,0x8020200,0x8000000,0x20008,0x208,0x20000,0x8020200,0x8000200,0,0x200,0x20008,0x8020208,0x8000200,0x8000008,0x200,0,0x8020008,0x8000208,0x20000,0x8000000,0x8020208,0x8,0x20208,0x20200,0x8000008,0x8020000,0x8000208,0x208,0x8020000,0x20208,0x8,0x8020008,0x20200);
var spfunction4 = new Array (0x802001,0x2081,0x2081,0x80,0x802080,0x800081,0x800001,0x2001,0,0x802000,0x802000,0x802081,0x81,0,0x800080,0x800001,0x1,0x2000,0x800000,0x802001,0x80,0x800000,0x2001,0x2080,0x800081,0x1,0x2080,0x800080,0x2000,0x802080,0x802081,0x81,0x800080,0x800001,0x802000,0x802081,0x81,0,0,0x802000,0x2080,0x800080,0x800081,0x1,0x802001,0x2081,0x2081,0x80,0x802081,0x81,0x1,0x2000,0x800001,0x2001,0x802080,0x800081,0x2001,0x2080,0x800000,0x802001,0x80,0x800000,0x2000,0x802080);
var spfunction5 = new Array (0x100,0x2080100,0x2080000,0x42000100,0x80000,0x100,0x40000000,0x2080000,0x40080100,0x80000,0x2000100,0x40080100,0x42000100,0x42080000,0x80100,0x40000000,0x2000000,0x40080000,0x40080000,0,0x40000100,0x42080100,0x42080100,0x2000100,0x42080000,0x40000100,0,0x42000000,0x2080100,0x2000000,0x42000000,0x80100,0x80000,0x42000100,0x100,0x2000000,0x40000000,0x2080000,0x42000100,0x40080100,0x2000100,0x40000000,0x42080000,0x2080100,0x40080100,0x100,0x2000000,0x42080000,0x42080100,0x80100,0x42000000,0x42080100,0x2080000,0,0x40080000,0x42000000,0x80100,0x2000100,0x40000100,0x80000,0,0x40080000,0x2080100,0x40000100);
var spfunction6 = new Array (0x20000010,0x20400000,0x4000,0x20404010,0x20400000,0x10,0x20404010,0x400000,0x20004000,0x404010,0x400000,0x20000010,0x400010,0x20004000,0x20000000,0x4010,0,0x400010,0x20004010,0x4000,0x404000,0x20004010,0x10,0x20400010,0x20400010,0,0x404010,0x20404000,0x4010,0x404000,0x20404000,0x20000000,0x20004000,0x10,0x20400010,0x404000,0x20404010,0x400000,0x4010,0x20000010,0x400000,0x20004000,0x20000000,0x4010,0x20000010,0x20404010,0x404000,0x20400000,0x404010,0x20404000,0,0x20400010,0x10,0x4000,0x20400000,0x404010,0x4000,0x400010,0x20004010,0,0x20404000,0x20000000,0x400010,0x20004010);
var spfunction7 = new Array (0x200000,0x4200002,0x4000802,0,0x800,0x4000802,0x200802,0x4200800,0x4200802,0x200000,0,0x4000002,0x2,0x4000000,0x4200002,0x802,0x4000800,0x200802,0x200002,0x4000800,0x4000002,0x4200000,0x4200800,0x200002,0x4200000,0x800,0x802,0x4200802,0x200800,0x2,0x4000000,0x200800,0x4000000,0x200800,0x200000,0x4000802,0x4000802,0x4200002,0x4200002,0x2,0x200002,0x4000000,0x4000800,0x200000,0x4200800,0x802,0x200802,0x4200800,0x802,0x4000002,0x4200802,0x4200000,0x200800,0,0x2,0x4200802,0,0x200802,0x4200000,0x800,0x4000002,0x4000800,0x800,0x200002);
var spfunction8 = new Array (0x10001040,0x1000,0x40000,0x10041040,0x10000000,0x10001040,0x40,0x10000000,0x40040,0x10040000,0x10041040,0x41000,0x10041000,0x41040,0x1000,0x40,0x10040000,0x10000040,0x10001000,0x1040,0x41000,0x40040,0x10040040,0x10041000,0x1040,0,0,0x10040040,0x10000040,0x10001000,0x41040,0x40000,0x41040,0x40000,0x10041000,0x1000,0x40,0x10040040,0x1000,0x41040,0x10001000,0x40,0x10000040,0x10040000,0x10040040,0x10000000,0x40000,0x10001040,0,0x10041040,0x40040,0x10000040,0x10040000,0x10001000,0x10001040,0,0x10041040,0x41000,0x41000,0x1040,0x1040,0x40040,0x10000000,0x10041000);
//create the 16 or 48 subkeys we will need
var keys = des_createKeys (key);
var m=0, i, j, temp, temp2, right1, right2, left, right, looping;
var cbcleft, cbcleft2, cbcright, cbcright2
var endloop, loopinc;
var len = message.length;
var chunk = 0;
//set up the loops for single and triple des
var iterations = keys.length == 32 ? 3 : 9; //single or triple des
if (iterations == 3) {looping = encrypt ? new Array (0, 32, 2) : new Array (30, -2, -2);}
else {looping = encrypt ? new Array (0, 32, 2, 62, 30, -2, 64, 96, 2) : new Array (94, 62, -2, 32, 64, 2, 30, -2, -2);}
//pad the message depending on the padding parameter
if (padding == 2) message += " "; //pad the message with spaces
else if (padding == 1) {temp = 8-(len%8); message += String.fromCharCode (temp,temp,temp,temp,temp,temp,temp,temp); if (temp==8) len+=8;} //PKCS7 padding
else if (!padding) message += "\0\0\0\0\0\0\0\0"; //pad the message out with null bytes
//store the result here
result = "";
tempresult = "";
if (mode == 1) { //CBC mode
cbcleft = (iv.charCodeAt(m++) << 24) | (iv.charCodeAt(m++) << 16) | (iv.charCodeAt(m++) << 8) | iv.charCodeAt(m++);
cbcright = (iv.charCodeAt(m++) << 24) | (iv.charCodeAt(m++) << 16) | (iv.charCodeAt(m++) << 8) | iv.charCodeAt(m++);
m=0;
}
//loop through each 64 bit chunk of the message
while (m < len) {
left = (message.charCodeAt(m++) << 24) | (message.charCodeAt(m++) << 16) | (message.charCodeAt(m++) << 8) | message.charCodeAt(m++);
right = (message.charCodeAt(m++) << 24) | (message.charCodeAt(m++) << 16) | (message.charCodeAt(m++) << 8) | message.charCodeAt(m++);
//for Cipher Block Chaining mode, xor the message with the previous result
if (mode == 1) {if (encrypt) {left ^= cbcleft; right ^= cbcright;} else {cbcleft2 = cbcleft; cbcright2 = cbcright; cbcleft = left; cbcright = right;}}
//first each 64 but chunk of the message must be permuted according to IP
temp = ((left >>> 4) ^ right) & 0x0f0f0f0f; right ^= temp; left ^= (temp << 4);
temp = ((left >>> 16) ^ right) & 0x0000ffff; right ^= temp; left ^= (temp << 16);
temp = ((right >>> 2) ^ left) & 0x33333333; left ^= temp; right ^= (temp << 2);
temp = ((right >>> 8) ^ left) & 0x00ff00ff; left ^= temp; right ^= (temp << 8);
temp = ((left >>> 1) ^ right) & 0x55555555; right ^= temp; left ^= (temp << 1);
left = ((left << 1) | (left >>> 31));
right = ((right << 1) | (right >>> 31));
//do this either 1 or 3 times for each chunk of the message
for (j=0; j<iterations; j+=3) {
endloop = looping[j+1];
loopinc = looping[j+2];
//now go through and perform the encryption or decryption
for (i=looping[j]; i!=endloop; i+=loopinc) { //for efficiency
right1 = right ^ keys[i];
right2 = ((right >>> 4) | (right << 28)) ^ keys[i+1];
//the result is attained by passing these bytes through the S selection functions
temp = left;
left = right;
right = temp ^ (spfunction2[(right1 >>> 24) & 0x3f] | spfunction4[(right1 >>> 16) & 0x3f]
| spfunction6[(right1 >>> 8) & 0x3f] | spfunction8[right1 & 0x3f]
| spfunction1[(right2 >>> 24) & 0x3f] | spfunction3[(right2 >>> 16) & 0x3f]
| spfunction5[(right2 >>> 8) & 0x3f] | spfunction7[right2 & 0x3f]);
}
temp = left; left = right; right = temp; //unreverse left and right
} //for either 1 or 3 iterations
//move then each one bit to the right
left = ((left >>> 1) | (left << 31));
right = ((right >>> 1) | (right << 31));
//now perform IP-1, which is IP in the opposite direction
temp = ((left >>> 1) ^ right) & 0x55555555; right ^= temp; left ^= (temp << 1);
temp = ((right >>> 8) ^ left) & 0x00ff00ff; left ^= temp; right ^= (temp << 8);
temp = ((right >>> 2) ^ left) & 0x33333333; left ^= temp; right ^= (temp << 2);
temp = ((left >>> 16) ^ right) & 0x0000ffff; right ^= temp; left ^= (temp << 16);
temp = ((left >>> 4) ^ right) & 0x0f0f0f0f; right ^= temp; left ^= (temp << 4);
//for Cipher Block Chaining mode, xor the message with the previous result
if (mode == 1) {if (encrypt) {cbcleft = left; cbcright = right;} else {left ^= cbcleft2; right ^= cbcright2;}}
tempresult += String.fromCharCode ((left>>>24), ((left>>>16) & 0xff), ((left>>>8) & 0xff), (left & 0xff), (right>>>24), ((right>>>16) & 0xff), ((right>>>8) & 0xff), (right & 0xff));
chunk += 8;
if (chunk == 512) {result += tempresult; tempresult = ""; chunk = 0;}
} //for every 8 characters, or 64 bits in the message
//return the result as an array
return result + tempresult;
} //end of des
//des_createKeys
//this takes as input a 64 bit key (even though only 56 bits are used)
//as an array of 2 integers, and returns 16 48 bit keys
function des_createKeys (key) {
//declaring this locally speeds things up a bit
pc2bytes0 = new Array (0,0x4,0x20000000,0x20000004,0x10000,0x10004,0x20010000,0x20010004,0x200,0x204,0x20000200,0x20000204,0x10200,0x10204,0x20010200,0x20010204);
pc2bytes1 = new Array (0,0x1,0x100000,0x100001,0x4000000,0x4000001,0x4100000,0x4100001,0x100,0x101,0x100100,0x100101,0x4000100,0x4000101,0x4100100,0x4100101);
pc2bytes2 = new Array (0,0x8,0x800,0x808,0x1000000,0x1000008,0x1000800,0x1000808,0,0x8,0x800,0x808,0x1000000,0x1000008,0x1000800,0x1000808);
pc2bytes3 = new Array (0,0x200000,0x8000000,0x8200000,0x2000,0x202000,0x8002000,0x8202000,0x20000,0x220000,0x8020000,0x8220000,0x22000,0x222000,0x8022000,0x8222000);
pc2bytes4 = new Array (0,0x40000,0x10,0x40010,0,0x40000,0x10,0x40010,0x1000,0x41000,0x1010,0x41010,0x1000,0x41000,0x1010,0x41010);
pc2bytes5 = new Array (0,0x400,0x20,0x420,0,0x400,0x20,0x420,0x2000000,0x2000400,0x2000020,0x2000420,0x2000000,0x2000400,0x2000020,0x2000420);
pc2bytes6 = new Array (0,0x10000000,0x80000,0x10080000,0x2,0x10000002,0x80002,0x10080002,0,0x10000000,0x80000,0x10080000,0x2,0x10000002,0x80002,0x10080002);
pc2bytes7 = new Array (0,0x10000,0x800,0x10800,0x20000000,0x20010000,0x20000800,0x20010800,0x20000,0x30000,0x20800,0x30800,0x20020000,0x20030000,0x20020800,0x20030800);
pc2bytes8 = new Array (0,0x40000,0,0x40000,0x2,0x40002,0x2,0x40002,0x2000000,0x2040000,0x2000000,0x2040000,0x2000002,0x2040002,0x2000002,0x2040002);
pc2bytes9 = new Array (0,0x10000000,0x8,0x10000008,0,0x10000000,0x8,0x10000008,0x400,0x10000400,0x408,0x10000408,0x400,0x10000400,0x408,0x10000408);
pc2bytes10 = new Array (0,0x20,0,0x20,0x100000,0x100020,0x100000,0x100020,0x2000,0x2020,0x2000,0x2020,0x102000,0x102020,0x102000,0x102020);
pc2bytes11 = new Array (0,0x1000000,0x200,0x1000200,0x200000,0x1200000,0x200200,0x1200200,0x4000000,0x5000000,0x4000200,0x5000200,0x4200000,0x5200000,0x4200200,0x5200200);
pc2bytes12 = new Array (0,0x1000,0x8000000,0x8001000,0x80000,0x81000,0x8080000,0x8081000,0x10,0x1010,0x8000010,0x8001010,0x80010,0x81010,0x8080010,0x8081010);
pc2bytes13 = new Array (0,0x4,0x100,0x104,0,0x4,0x100,0x104,0x1,0x5,0x101,0x105,0x1,0x5,0x101,0x105);
//how many iterations (1 for des, 3 for triple des)
var iterations = key.length > 8 ? 3 : 1; //changed by Paul 16/6/2007 to use Triple DES for 9+ byte keys
//stores the return keys
var keys = new Array (32 * iterations);
//now define the left shifts which need to be done
var shifts = new Array (0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0);
//other variables
var lefttemp, righttemp, m=0, n=0, temp;
for (var j=0; j<iterations; j++) { //either 1 or 3 iterations
left = (key.charCodeAt(m++) << 24) | (key.charCodeAt(m++) << 16) | (key.charCodeAt(m++) << 8) | key.charCodeAt(m++);
right = (key.charCodeAt(m++) << 24) | (key.charCodeAt(m++) << 16) | (key.charCodeAt(m++) << 8) | key.charCodeAt(m++);
temp = ((left >>> 4) ^ right) & 0x0f0f0f0f; right ^= temp; left ^= (temp << 4);
temp = ((right >>> -16) ^ left) & 0x0000ffff; left ^= temp; right ^= (temp << -16);
temp = ((left >>> 2) ^ right) & 0x33333333; right ^= temp; left ^= (temp << 2);
temp = ((right >>> -16) ^ left) & 0x0000ffff; left ^= temp; right ^= (temp << -16);
temp = ((left >>> 1) ^ right) & 0x55555555; right ^= temp; left ^= (temp << 1);
temp = ((right >>> 8) ^ left) & 0x00ff00ff; left ^= temp; right ^= (temp << 8);
temp = ((left >>> 1) ^ right) & 0x55555555; right ^= temp; left ^= (temp << 1);
//the right side needs to be shifted and to get the last four bits of the left side
temp = (left << 8) | ((right >>> 20) & 0x000000f0);
//left needs to be put upside down
left = (right << 24) | ((right << 8) & 0xff0000) | ((right >>> 8) & 0xff00) | ((right >>> 24) & 0xf0);
right = temp;
//now go through and perform these shifts on the left and right keys
for (var i=0; i < shifts.length; i++) {
//shift the keys either one or two bits to the left
if (shifts[i]) {left = (left << 2) | (left >>> 26); right = (right << 2) | (right >>> 26);}
else {left = (left << 1) | (left >>> 27); right = (right << 1) | (right >>> 27);}
left &= -0xf; right &= -0xf;
//now apply PC-2, in such a way that E is easier when encrypting or decrypting
//this conversion will look like PC-2 except only the last 6 bits of each byte are used
//rather than 48 consecutive bits and the order of lines will be according to
//how the S selection functions will be applied: S2, S4, S6, S8, S1, S3, S5, S7
lefttemp = pc2bytes0[left >>> 28] | pc2bytes1[(left >>> 24) & 0xf]
| pc2bytes2[(left >>> 20) & 0xf] | pc2bytes3[(left >>> 16) & 0xf]
| pc2bytes4[(left >>> 12) & 0xf] | pc2bytes5[(left >>> 8) & 0xf]
| pc2bytes6[(left >>> 4) & 0xf];
righttemp = pc2bytes7[right >>> 28] | pc2bytes8[(right >>> 24) & 0xf]
| pc2bytes9[(right >>> 20) & 0xf] | pc2bytes10[(right >>> 16) & 0xf]
| pc2bytes11[(right >>> 12) & 0xf] | pc2bytes12[(right >>> 8) & 0xf]
| pc2bytes13[(right >>> 4) & 0xf];
temp = ((righttemp >>> 16) ^ lefttemp) & 0x0000ffff;
keys[n++] = lefttemp ^ temp; keys[n++] = righttemp ^ (temp << 16);
}
} //for each iterations
//return the keys we've created
return keys;
} //end of des_createKeys
////////////////////////////// TEST //////////////////////////////
function stringToHex (s) {
var r = "0x";
var hexes = new Array ("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f");
for (var i=0; i<s.length; i++) {r += hexes [s.charCodeAt(i) >> 4] + hexes [s.charCodeAt(i) & 0xf];}
return r;
}
function hexToString (h) {
var r = "";
for (var i= (h.substr(0, 2)=="0x")?2:0; i<h.length; i+=2) {r += String.fromCharCode (parseInt (h.substr (i, 2), 16));}
return r;
}
var key = "12345678";
var message = "This is a test message";
var ciphertext = des (key, message, 1, 1,"23456789");
//console.log("Encrypted Value : " + ciphertext);
console.log ("Cipher Text is : " + stringToHex (ciphertext));
var deciphertext = des(key, stringToHex(ciphertext), 0, 1,"23456789");
console.log("Decipher Text is (Message) is :" + hexToString(deciphertext));
I am obtaining the correct encrypted value but for some reason during decryption i'm not obtaining the actual message which is "This is a test message". Instead,I'm obtaining a unicode value ±.
I am using DES with CBC mode of operation. Input vector is 23456789
The source code is from http://www.tero.co.uk/des/code.php.
I ran this script in Developer tools of Google Chrome.
I need to create a SHA-256 digest from a file (~6MB) inside the browser. The only way that I've managed to do it so far was like this:
var reader = new FileReader();
reader.onload = function() {
// this gets rid of the mime-type data header
var actual_contents = reader.result.slice(reader.result.indexOf(',') + 1);
var what_i_need = new jsSHA(actual_contents, "B64").getHash("SHA-256", "HEX");
}
reader.readAsDataURL(some_file);
While this works correctly, the problem is that it's very slow. It took ~2-3 seconds for a 6MB file. How can I improve this?
You may want to take a look at the Stanford JS Crypto Library
GitHub
Website with Examples
From the website:
SJCL is secure. It uses the industry-standard AES algorithm at 128, 192 or 256 bits; the SHA256 hash function; the HMAC authentication code; the PBKDF2 password strengthener; and the CCM and OCB authenticated-encryption modes.
SJCL has a test page that shows how long it will take.
184 milliseconds for a SHA256 iterative. And 50 milliseconds for a SHA-256 from catameringue.
Test page
Sample code:
Encrypt data:
sjcl.encrypt("password", "data")
Decrypt data: sjcl.decrypt("password", "encrypted-data")
This is an old question but I thought it's worth noting that asmCrypto is significantly faster than jsSHA, and faster than CryptoJS and SJCL
https://github.com/vibornoff/asmcrypto.js/
There is also a lite version (a fork of the above) maintained by OpenPGP.js
https://github.com/openpgpjs/asmcrypto-lite
Which only includes SHA256, and a couple of AES features.
To use asmCrypto You can simply do the following:
var sha256HexValue = asmCrypto.SHA256.hex(myArraybuffer);
I'm able to hash a 150MB+ file in < 2 seconds consistently in Chrome.
Here is what your looking for. I derived this from a C version of the SHA256 algorithm. It also includes SHA256D. I don't think your going to get much faster than this with javascript. I tried expanding the loops and it ran slower due to optimizations run by the javascript interpreter.
// From: https://github.com/Hartland/GPL-CPU-Miner/blob/master/sha2.c
if ("undefined" == typeof vnet) {
vnet = new Array();
}
if ("undefined" == typeof vnet.crypt) {
vnet.crypt = new Array();
}
vnet.crypt.sha2 = function() {
var sha256_h = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
];
var sha256_k = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
];
var sha256_init = function(s) {
s.state = [
sha256_h[0],
sha256_h[1],
sha256_h[2],
sha256_h[3],
sha256_h[4],
sha256_h[5],
sha256_h[6],
sha256_h[7],
];
}; this.sha256_init = sha256_init;
/*
* SHA256 block compression function. The 256-bit state is transformed via
* the 512-bit input block to produce a new state.
*/
var sha256_transform = function(s, b, swap) {
var block = b.block;
var state = s.state;
var W;
var S;
var t0;
var t1;
var i;
/* 1. Prepare message schedule W. */
if (swap) {
W = [
((((block[0] ) << 24) & 0xff000000) | (((block[0] ) << 8) & 0x00ff0000) | (((block[0] ) >> 8) & 0x0000ff00) | (((block[0] ) >> 24) & 0x000000ff)),
((((block[1] ) << 24) & 0xff000000) | (((block[1] ) << 8) & 0x00ff0000) | (((block[1] ) >> 8) & 0x0000ff00) | (((block[1] ) >> 24) & 0x000000ff)),
((((block[2] ) << 24) & 0xff000000) | (((block[2] ) << 8) & 0x00ff0000) | (((block[2] ) >> 8) & 0x0000ff00) | (((block[2] ) >> 24) & 0x000000ff)),
((((block[3] ) << 24) & 0xff000000) | (((block[3] ) << 8) & 0x00ff0000) | (((block[3] ) >> 8) & 0x0000ff00) | (((block[3] ) >> 24) & 0x000000ff)),
((((block[4] ) << 24) & 0xff000000) | (((block[4] ) << 8) & 0x00ff0000) | (((block[4] ) >> 8) & 0x0000ff00) | (((block[4] ) >> 24) & 0x000000ff)),
((((block[5] ) << 24) & 0xff000000) | (((block[5] ) << 8) & 0x00ff0000) | (((block[5] ) >> 8) & 0x0000ff00) | (((block[5] ) >> 24) & 0x000000ff)),
((((block[6] ) << 24) & 0xff000000) | (((block[6] ) << 8) & 0x00ff0000) | (((block[6] ) >> 8) & 0x0000ff00) | (((block[6] ) >> 24) & 0x000000ff)),
((((block[7] ) << 24) & 0xff000000) | (((block[7] ) << 8) & 0x00ff0000) | (((block[7] ) >> 8) & 0x0000ff00) | (((block[7] ) >> 24) & 0x000000ff)),
((((block[8] ) << 24) & 0xff000000) | (((block[8] ) << 8) & 0x00ff0000) | (((block[8] ) >> 8) & 0x0000ff00) | (((block[8] ) >> 24) & 0x000000ff)),
((((block[9] ) << 24) & 0xff000000) | (((block[9] ) << 8) & 0x00ff0000) | (((block[9] ) >> 8) & 0x0000ff00) | (((block[9] ) >> 24) & 0x000000ff)),
((((block[10]) << 24) & 0xff000000) | (((block[10]) << 8) & 0x00ff0000) | (((block[10]) >> 8) & 0x0000ff00) | (((block[10]) >> 24) & 0x000000ff)),
((((block[11]) << 24) & 0xff000000) | (((block[11]) << 8) & 0x00ff0000) | (((block[11]) >> 8) & 0x0000ff00) | (((block[11]) >> 24) & 0x000000ff)),
((((block[12]) << 24) & 0xff000000) | (((block[12]) << 8) & 0x00ff0000) | (((block[12]) >> 8) & 0x0000ff00) | (((block[12]) >> 24) & 0x000000ff)),
((((block[13]) << 24) & 0xff000000) | (((block[13]) << 8) & 0x00ff0000) | (((block[13]) >> 8) & 0x0000ff00) | (((block[13]) >> 24) & 0x000000ff)),
((((block[14]) << 24) & 0xff000000) | (((block[14]) << 8) & 0x00ff0000) | (((block[14]) >> 8) & 0x0000ff00) | (((block[14]) >> 24) & 0x000000ff)),
((((block[15]) << 24) & 0xff000000) | (((block[15]) << 8) & 0x00ff0000) | (((block[15]) >> 8) & 0x0000ff00) | (((block[15]) >> 24) & 0x000000ff))
];
} else {
W = [
block[0],
block[1],
block[2],
block[3],
block[4],
block[5],
block[6],
block[7],
block[8],
block[9],
block[10],
block[11],
block[12],
block[13],
block[14],
block[15]
];
}
for (i = 16; i < 64; i += 2) {
W[i] = ((
((((W[i-2] >>> 17) | (W[i-2] << 15)) ^ ((W[i-2] >>> 19) | ((W[i-2] << 13)>>>0) ) ^ (W[i - 2] >>> 10)) >>> 0) + //s1 (W[i - 2]) +
W[i - 7] +
((((W[i - 15] >>> 7) | (W[i - 15] << 25)) ^ ((W[i - 15] >>> 18) | ((W[i - 15] << 14) >>> 0)) ^ (W[i - 15] >>> 3)) >>> 0) + //s0 (W[i - 15]) +
W[i - 16]
) & 0xffffffff) >>> 0;
W[i+1] = ((
((((W[i-1] >>> 17) | (W[i-1] << 15)) ^ ((W[i-1] >>> 19) | (W[i-1] << 13)) ^ (W[i - 1] >>> 10)) >>> 0)+ //s1 (W[i - 1]) +
W[i - 6] +
((((W[i - 14] >>> 7) | (W[i - 14] << 25)) ^ ((W[i - 14] >>> 18) | (W[i - 14] << 14)) ^ (W[i - 14] >>> 3)) >>> 0) + //s0 (W[i - 14]) +
W[i - 15]
) & 0xffffffff) >>> 0;
}
/* 2. Initialize working variables. */
S = [
state[0],
state[1],
state[2],
state[3],
state[4],
state[5],
state[6],
state[7],
];
/* 3. Mix. */
i=0;
for(;i<64;++i) {
//RNDr(S,W,i)
t0 = S[(71 - i) % 8] +
((((S[(68 - i) % 8] >>> 6) | (S[(68 - i) % 8] << 26)) ^ ((S[(68 - i) % 8] >>> 11) | (S[(68 - i) % 8] << 21)) ^ ((S[(68 - i) % 8] >>> 25) | (S[(68 - i) % 8] << 7)))) + //S1 (S[(68 - i) % 8]) +
(((S[(68 - i) % 8] & (S[(69 - i) % 8] ^ S[(70 - i) % 8])) ^ S[(70 - i) % 8]) ) + // Ch
W[i] +
sha256_k[i];
t1 = ((((S[(64 - i) % 8] >>> 2) | ((S[(64 - i) % 8] & 3) << 30)) ^ ((S[(64 - i) % 8] >>> 13) | (S[(64 - i) % 8] << 19)) ^ ((S[(64 - i) % 8] >>> 22) | (S[(64 - i) % 8] << 10)))) + //S0 (S[(64 - i) % 8]) +
(((S[(64 - i) % 8] & (S[(65 - i) % 8] | S[(66 - i) % 8])) | (S[(65 - i) % 8] & S[(66 - i) % 8]))); // Maj
S[(67 - i) % 8] = ((S[(67 - i) % 8] + t0) & 0xFFFFFFFF) >>> 0;
S[(71 - i) % 8] = ((t0 + t1) & 0xFFFFFFFF) >>> 0;
}
/* 4. Mix local working variables into global state */
i=0;
for(;i<8;++i) {
s.state[i] = (0xFFFFFFFF & (state[i] + S[i])) >>> 0;
}
}; this.sha256_transform = sha256_transform;
var sha256d_hash1 = [
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x80000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000100
];
var sha256d_80_swap = function(hash, data)
{
var S = new Array();
var i;
var b1 = new Array();
var b2 = new Array();
var b3 = new Array();
b1.block = [
data[0],
data[1],
data[2],
data[3],
data[4],
data[5],
data[6],
data[7],
data[8],
data[9],
data[10],
data[11],
data[12],
data[13],
data[14],
data[15]
];
b2.block = [
data[16],
data[17],
data[18],
data[19],
data[20],
data[21],
data[22],
data[23],
data[24],
data[25],
data[26],
data[27],
data[28],
data[29],
data[30],
data[31]
];
sha256_init(S);
sha256_transform(S, b1, 0);
sha256_transform(S, b2, 0);
b3.block = [
S.state[0],
S.state[1],
S.state[2],
S.state[3],
S.state[4],
S.state[5],
S.state[6],
S.state[7],
sha256d_hash1[8],
sha256d_hash1[9],
sha256d_hash1[10],
sha256d_hash1[11],
sha256d_hash1[12],
sha256d_hash1[13],
sha256d_hash1[14],
sha256d_hash1[15]
];
sha256_init(hash);
sha256_transform(hash, b3, 0);
for (i = 0; i < 8; i++) {
hash.state[i] = ((((hash.state[i] ) << 24) & 0xff000000) | (((hash.state[i] ) << 8) & 0x00ff0000) | (((hash.state[i] ) >> 8) & 0x0000ff00) | (((hash.state[i] ) >> 24) & 0x000000ff)); //swab32(hash[i]);
}
}; this.sha256d_80_swap = sha256d_80_swap;
var sha256d = function(hash, data) {
var S;
var T;
var block_in;
S = new Array();
T = new Array();
T.block = [];
var i, r;
//hash.hash = new Array(32).join('0').split('').map(parseFloat);
sha256_init(S);
for (r = data.length; r > -9; r -= 64) {
if (r < 64) {
if (r > 0) {
block_in = data.slice(data.length - r,data.length);
block_in.push.apply(block_in, new Array(64-r).join('0').split('').map(parseFloat));
} else {
block_in = new Array(64).join('0').split('').map(parseFloat);
}
} else {
block_in = data.slice(data.length - r,data.length - r + 64);
}
//memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64) {
block_in[r] = 0x80;
}
for (i = 0; i < 16; i++) {
T.block[i] = (((0xff & block_in[(i*4)]) << 24) | ((0xff & block_in[(i*4)+1]) << 16) | ((0xff & block_in[(i*4)+2]) << 8) | (0xff & block_in[(i*4)+3])) >>> 0;
}
if (r < 56) {
T.block[15] = 8 * data.length;
}
sha256_transform(S, T, 0);
}
//memcpy(S + 8, sha256d_hash1 + 8, 32);
S.block = S.state;
for(i=8;i<16;i++) {
S.block[i] = sha256d_hash1[i];
}
sha256_init(T);
sha256_transform(T, S, 0);
hash.hash = [
(T.state[0] >> 24) & 0xff,
(T.state[0] >> 16) & 0xff,
(T.state[0] >> 8) & 0xff,
T.state[0] & 0xff,
(T.state[1] >> 24) & 0xff,
(T.state[1] >> 16) & 0xff,
(T.state[1] >> 8) & 0xff,
T.state[1] & 0xff,
(T.state[2] >> 24) & 0xff,
(T.state[2] >> 16) & 0xff,
(T.state[2] >> 8) & 0xff,
T.state[2] & 0xff,
(T.state[3] >> 24) & 0xff,
(T.state[3] >> 16) & 0xff,
(T.state[3] >> 8) & 0xff,
T.state[3] & 0xff,
(T.state[4] >> 24) & 0xff,
(T.state[4] >> 16) & 0xff,
(T.state[4] >> 8) & 0xff,
T.state[4] & 0xff,
(T.state[5] >> 24) & 0xff,
(T.state[5] >> 16) & 0xff,
(T.state[5] >> 8) & 0xff,
T.state[5] & 0xff,
(T.state[6] >> 24) & 0xff,
(T.state[6] >> 16) & 0xff,
(T.state[6] >> 8) & 0xff,
T.state[6] & 0xff,
(T.state[7] >> 24) & 0xff,
(T.state[7] >> 16) & 0xff,
(T.state[7] >> 8) & 0xff,
T.state[7] & 0xff
];
}; this.sha256d = sha256d;
var sha256 = function(hash, data) {
var S;
var T;
var block_in;
S = new Array();
T = new Array();
T.block = [];
var i, r;
hash.hash = new Array(32).join('0').split('').map(parseFloat);
sha256_init(S);
for (r = data.length; r > -9; r -= 64) {
if (r < 64) {
if (r > 0) {
block_in = data.slice(data.length - r,data.length);
block_in.push.apply(block_in, new Array(64-r).join('0').split('').map(parseFloat));
} else {
block_in = new Array(64).join('0').split('').map(parseFloat);
}
} else {
block_in = data.slice(data.length - r,data.length - r + 64);
}
//memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r));
if (r >= 0 && r < 64) {
block_in[r] = 0x80;
}
for (i = 0; i < 16; i++) {
T.block[i] = (((0xff & block_in[(i*4)]) << 24) | ((0xff & block_in[(i*4)+1]) << 16) | ((0xff & block_in[(i*4)+2]) << 8) | (0xff & block_in[(i*4)+3])) >>> 0;
}
if (r < 56) {
T.block[15] = 8 * data.length;
}
sha256_transform(S, T, 0);
}
for (i = 0; i < 8; i++) {
//be32enc((uint32_t *)hash + i, T[i]);
hash.hash[(i * 4)] = (S.state[i] >> 24) & 0xff;
hash.hash[(i * 4)+1] = (S.state[i] >> 16) & 0xff
hash.hash[(i * 4)+2] = (S.state[i] >> 8) & 0xff
hash.hash[(i * 4)+3] = S.state[i] & 0xff;
}
}; this.sha256 = sha256;
};
It might be faster to use an emscripten compiled version of the crypto libraries,
Q. How fast will the compiled code be?
A. Emscripten's default code generation mode is in asm.js format,
which is a subset of JavaScript designed to make it possible for
JavaScript engines to execute very quickly. See here for up-to-date
benchmark results. In many cases, asm.js can get quite close to native
speed.
You can find an Emscripten-compiled NaCl cryptographic library here.
I use SubtleCrypto.digest()
test file about ~85MB, It doesn't take a second to finish.
<input type="file" multiple/>
<input placeholder="Press `Enter` when done."/>
<script>
/**
* #param {"SHA-1"|"SHA-256"|"SHA-384"|"SHA-512"} algorithm https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest
* #param {string|Blob} data
*/
async function getHash(algorithm, data) {
const main = async (msgUint8) => { // https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest#converting_a_digest_to_a_hex_string
const hashBuffer = await crypto.subtle.digest(algorithm, msgUint8)
const hashArray = Array.from(new Uint8Array(hashBuffer))
return hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
}
if (data instanceof Blob) {
const arrayBuffer = await data.arrayBuffer()
const msgUint8 = new Uint8Array(arrayBuffer)
return await main(msgUint8)
}
const encoder = new TextEncoder()
const msgUint8 = encoder.encode(data)
return await main(msgUint8)
}
const inputFile = document.querySelector(`input[type="file"]`)
const inputText = document.querySelector(`input[placeholder^="Press"]`)
inputFile.onchange = async (event) => {
for (const file of event.target.files) {
console.log(file.name, file.type, file.size + "bytes")
const hashHex = await getHash("SHA-256", new Blob([file]))
console.log(hashHex)
}
}
inputText.onkeyup = async (keyboardEvent) => {
if (keyboardEvent.key === "Enter") {
const hashHex = await getHash("SHA-256", keyboardEvent.target.value)
console.log(hashHex)
}
}
</script>
As some have answered, it can be done in vanillajs :
async function getChecksumSha256(blob: Blob): Promise<string> {
const uint8Array = new Uint8Array(await blob.arrayBuffer());
const hashBuffer = await crypto.subtle.digest('SHA-256', uint8Array);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map((h) => h.toString(16).padStart(2, '0')).join('');
}
Source : https://gist.github.com/bilelz/c96fb0b1f62983d061910e8d310a5162
You can do that without external libraries using Crypto.subtle API. More details here.
Example:
function b2h(buffer) {
return Array.prototype.map.call(new Uint8Array(buffer), x => ('00' + x.toString(16)).slice(-2)).join('');
}
const FILEREADER = new FileReader();
FILEREADER.readAsArrayBuffer(file);
FILEREADER.onloadend = async function(entry) {
const FILE_HASH = b2h(await crypto.subtle.digest('SHA-256', entry.target.result)); // output: the sha256 digest hex encoded of the file
}