Convert byte array to numbers in JavaScript - javascript

I have JavaScript code that retrieves numerical vectors from a web-service. The original data is an array of doubles that is converted to a byte array and then base64 encoded. I decode from base64 in JavaScript, but then I don't know how to transform the resulting bytes into an array of numbers.

This was the only way I could think of off the top of my head to do it.
function bytesToDouble(str,start) {
start *= 8;
var data = [str.charCodeAt(start+7),
str.charCodeAt(start+6),
str.charCodeAt(start+5),
str.charCodeAt(start+4),
str.charCodeAt(start+3),
str.charCodeAt(start+2),
str.charCodeAt(start+1),
str.charCodeAt(start+0)];
var sign = (data[0] & 1<<7)>>7;
var exponent = (((data[0] & 127) << 4) | (data[1]&(15<<4))>>4);
if(exponent == 0) return 0;
if(exponent == 0x7ff) return (sign) ? Number.POSITIVE_INFINITY : Number.NEGATIVE_INFINITY;
var mul = Math.pow(2,exponent - 1023 - 52);
var mantissa = data[7]+
data[6]*Math.pow(2,8*1)+
data[5]*Math.pow(2,8*2)+
data[4]*Math.pow(2,8*3)+
data[3]*Math.pow(2,8*4)+
data[2]*Math.pow(2,8*5)+
(data[1]&15)*Math.pow(2,8*6)+
Math.pow(2,52);
return Math.pow(-1,sign)*mantissa*mul;
}
var data = atob("AAAAAABsskAAAAAAAPmxQAAAAAAAKrF");
alert(bytesToDouble(data,0)); // 4716.0
alert(bytesToDouble(data,1)); // 4601.0
This should give you a push in the right direction, though it took me a while to remember how to deal with doubles.
One big caveats to note though:
This relies on the atob to do the base64 decoding, which is not supported everywhere, and aside from that probably isn't a great idea anyway. What you really want to do is unroll the base64 encoded string to an array of numbers (bytes would be the easiest to work with although not the most efficient thing on the planet). The reason is that when atob does its magic, it returns a string, which is far from ideal. Depending on the encoding the code points it maps to (especially for code points between 128 and 255) the resulting .charCodeAt() may not return what you expect.
And there may be some accuracy issues, because after all I am using a double to calculate a double, but I think it might be okay.
Base64 is fairly trivial to work with, so you should be able to figure that part out.
If you did switch to an array (rather than the str string now), then you would obviously drop the .charCodeAt() reference and just get the indices you want directly.
There is a functioning fiddle here

I assume we have used this function in web service (c#) to encode the double array data as string:
//Input: [552.4, 539.8]
//Output: IOz0gCAsscA=
private string ConvertToSerializableString(double[] input)
{
byte[] data = new byte[input.Length * 4];
for (int i = 0; i < input.Length; i++)
{
int source = (int)(input[i] * 1E6);
int dataIndex = i * 4;
data[dataIndex] = (byte)((source >> 24) & 0xFF);
data[dataIndex + 1] = (byte)((source >> 16) & 0xFF);
data[dataIndex + 2] = (byte)((source >> 8) & 0xFF);
data[dataIndex + 3] = (byte)(source & 0xFF);
}
return Convert.ToBase64String(data);
}
Then we can use the following client script (javascript) to decode it:
var base64EncodedDoubleArrayData = "IOz0gCAsscA=";
var byteData = window.atob(base64EncodedDoubleArrayData);
var doubleArray = [];
for (var iColumn = 0; iColumn < byteData.length; iColumn = iColumn + 4)
{
var item = (byteData.charCodeAt(iColumn) << 24) + (byteData.charCodeAt(iColumn + 1) << 16) + (byteData.charCodeAt(iColumn + 2) << 8) + byteData.charCodeAt(iColumn + 3);
var doubleResult = parseFloat(item/1e6);
doubleArray.push(doubleResult);
}
//it should return something like doubleArray = [552.4, 539.8]

Related

How to split large integer into an array of 8-bit integers

Wondering how to convert the output of arbitrarily sized integers like 1 or 12345 or 5324617851000199519157 to an array of integers.
[1] // for the first one
// [probably just a few values for the second 12345...]
[1, 123, 255, 32, ...] // not sure here...
I am not sure what the resulting value would look like or how to compute it, but somehow it would be something like:
A bunch of 8-bit numbers that can be used to reconstruct (somehow) the original arbitrary integer. I am not sure what calculations would be required to do this either. But all I do know is that each unique arbitrarily-sized integer should result in a unique array of 8-bit values. That is, no two different date integers should result in the same array.
It doesn't matter the language much how this is implemented, but probably an imperative language like JavaScript or C.
I am pretty sure the arrays should all be the same length as well, but if that's not possible then knowing how to do it a different way would be okay.
I'm not sure if this is too brute-forcey for what you want, but you can take an arbitrary string and just do the long division into a unit8Array.
Here's a function (borrowed liberally from here) that will convert back and forth from an arbitrarily long string:
function eightBit(str){
let dec = [...str], sum = []
while(dec.length){
let s = 1 * dec.shift()
for(let i = 0; s || i < sum.length; i++){
s += (sum[i] || 0) * 10
sum[i] = s % 256
s = (s - sum[i]) / 256
}
}
return Uint8Array.from(sum.reverse())
}
function eightBit2String(arr){
var dec = [...arr], sum = []
while(dec.length){
let s = 1 * dec.shift()
for(let i = 0; s || i < sum.length; i++){
s += (sum[i] || 0) * 256
sum[i] = s % 10
s = (s - sum[i]) / 10
}
}
return sum.reverse().join('')
}
// sanity check
console.log("256 = ", eightBit('256'), "258 = ", eightBit('258'))
let n = '47171857151875817758571875815815782572758275672576575677'
let a = eightBit(n)
console.log("to convert:", n)
console.log("converted:", a.toString())
let s = eightBit2String(a)
console.log("converted back:", s)
No doubt, there are some efficiencies to be found (maybe you can avoid the interim arrays).
Most languages, including C and Javascript, have bit-shifting and bit-masking operations as part of their basic math operations. But beware Javascript: numbers are 64 bits, but only 32-bit masking operations are allowed. So:
let bignum = Date.now();
let hi = Math.floor(bignum / 0x100000000),
lo = bignum & 0xFFFFFFFF,
bytes = [
(hi >> 24) & 0xFF,
(hi >> 16) & 0xFF,
(hi >> 8) & 0xFF,
hi & 0xFF,
(lo >> 24) & 0xFF,
(lo >> 16) & 0xFF,
(lo >> 8) & 0xFF,
lo & 0xFF
];

Create a bit mask for Javascript typed arrays

I am working with Javascript typed arrays, and I need to compress them as much as possible for networking purposes.
The smallest built in array Javascript has is 8 bits per entry. This will store numbers between 0 and 255.
However the data I'm working with will only contain numbers between 0 and 3. This can can be stored using 2 bits.
So my question is, if I have an 8 bit array that is populated with data only using numbers between 0 and 3, how can I "convert" it into a 2 bit array?
I know I'll need to use a bit operator, but I'm not sure how to make a mask that will only focus on 2 bits at a time.
A longer example is hard to fit into a comment :)
Up front, please note that very often, network data is compressed already - e.g. with gzip (especially when there is concern about data volume and the network libraries are setup properly). However, this is not always the case and would still not be as compact as doing it manually.
You need to keep track of two things, the current array index and the current slot inside the 8-Bit that is being read or written. For writing, | is useful, for reading &. Shifts (<< or >>) are used to select the position.
const randomTwoBitData = () => Math.floor(Math.random() * 4);
//Array of random 2-Bit data
const sampleData = Array(256).fill().map(e => randomTwoBitData());
//four entries per 8-Bit
let buffer = new Uint8Array(sampleData.length / 4);
//Writing data, i made my life easy
//because the data is divisible by four and fits perfectly.
for (let i = 0; i < sampleData.length; i += 4) {
buffer[i / 4] =
sampleData[i] |
(sampleData[i + 1] << 2) |
(sampleData[i + 2] << 4) |
(sampleData[i + 3] << 6);
}
//padding for console logging
const pad = (s, n) => "0".repeat(Math.max(0, n - s.length)) + s;
//Some output to see results at the middle
console.log(`buffer: ${pad(buffer[31].toString(2), 8)}, ` +
`original data: ${pad(sampleData[127].toString(2), 2)}, ` +
`${pad(sampleData[126].toString(2), 2)}, ` +
`${pad(sampleData[125].toString(2), 2)}, ` +
`${pad(sampleData[124].toString(2), 2)}`);
console.log("(order of original data inverted for readability)");
console.log("");
//Reading back:
let readData = [];
buffer.forEach(byte => {
readData.push(byte & 3); // 3 is 00000011 binary
readData.push((byte & 12) >> 2); // 12 is 00001100 binary
readData.push((byte & 48) >> 4); // 48 is 00110000 binary
readData.push((byte & 192) >> 6); // 192 is 11000000 binary
});
//Check if data read from compacted buffer is the same
//as the original
console.log(`original data and re-read data are identical: ` +
readData.every((e, i) => e === sampleData[i]));
Here is a function to do 8 bits number to 2 bits array of length 4 with & and >>:
function convert8to2(val){
var arr = [];
arr.push((val&parseInt('11000000', 2))>>6);
arr.push((val&parseInt('00110000', 2))>>4);
arr.push((val&parseInt('00001100', 2))>>2);
arr.push((val&parseInt('00000011', 2)));
return arr;
}
function convert2to8(arr){
if(arr.length != 4)
throw 'erorr';
return (arr[0]<<6)+(arr[1]<<4)+(arr[2]<<2)+arr[3];
}
// 228 = 11100100
var arr = convert8to2(228);
console.log(arr);
console.log(convert2to8(arr));
Edited
Change the example value and format the binary number with leading 0
Edited
Add convert2to8 and create an example usage:
function convert8to2(val){
var arr = [];
arr.push((val&parseInt('11000000', 2))>>6);
arr.push((val&parseInt('00110000', 2))>>4);
arr.push((val&parseInt('00001100', 2))>>2);
arr.push((val&parseInt('00000011', 2)));
return arr;
}
function convert2to8(arr){
if(arr.length != 4)
throw 'erorr';
return (arr[0]<<6)+(arr[1]<<4)+(arr[2]<<2)+arr[3];
}
var randomData = [];
for(var i=0;i<10;i++){
randomData.push(Math.floor(Math.random() * 255));
}
console.log(randomData);
var arrayOf2 = []
for(var i=0;i<randomData.length;i++){
arrayOf2.push(convert8to2(randomData[i]));
}
console.log(arrayOf2);
var arrayOf8 = [];
for(var i=0;i<arrayOf2.length;i++){
arrayOf8.push(convert2to8(arrayOf2[i]));
}
console.log(arrayOf8);

JavaScript calculate hashcode from real number and integer number

Hi there I need function to calculate unique integer number from number (real number double precision) and integer.
Try explain I am developing GIS application in javascript and I am working with complex vector object like polygon (array of points object with two coordinate in ring) and lines array of points. I need fast algorithm to recognize that element has been changed it must be really fast because my vector object is collection of thousand points . In C# I am calculating hash code from coordinate using bitwise operation XOR.
But javascript convert all operands in bitwise operation to integer but i need convert double precision to integer before apply bitwise in c# way (binnary). In reflector i see this that c# calculate hash code fro double like this and I need this function in javascript as fast as can be.
public override unsafe int GetHashCode() //from System.Double
{
double num = this;
if (num == 0.0)
{
return 0;
}
long num2 = *((long*) &num);
return (((int) num2) ^ ((int) (num2 >> 32)));
}
Example:
var rotation = function (n) {
n = (n >> 1) | ((n & 0x001) << 31);
return n;
}
var x: number = 1;
var y: number = 5;
var hash = x ^ rotation(y); // result is -2147483645
var x1: number = 1.1;
var y1: number = 5;
var hash1 = x1 ^ rotation(y1); // result is -2147483645
Example result is not correct hash == hash1
Example 2: Using to string there is correct result but calculate Hash from string is to complicate and I thing is not fast enough.
var rotation = function (n) {
n = (n >> 1) | ((n & 0x001) << 31);
return n;
}
var GetHashCodeString = function(str: string): number {
var hash = 0, i, l, ch;
if (str.length == 0) return hash;
for (i = 0, l = str.length; i < l; i++) {
ch = str.charCodeAt(i);
hash = ((hash << 5) - hash) + ch;
hash |= 0; // Convert to 32bit integer
}
return hash;
}
var x: number = 1;
var y: number = 5;
var hash = GetHashCodeString(x.toString()) ^ rotation(GetHashCodeString(y.toString()));
//result is -2147483605
var x1: number = 1.1;
var y1: number = 5;
var hash1 = GetHashCodeString(x1.toString()) ^ rotation(GetHashCodeString(y1.toString()));
//result is -2147435090
Example2 result is correct hash != hash1
Is there some faster way than converting number to string than calculate hash from each character? Because my object is very large and it will take lot of time and operation in this way ...
I try do it using TypedArrays but yet I am not successful.
Thanks very much for your help
Hi there I tried use TypedArrays to calculate Hash code from number and the result is interesting. In IE the performance 4x better in Chrome 2x in FireFox this approach is equal to string version ...
var GetHashCodeNumber = function (n: number): number {
//create 8 byte array buffer number in js is 64bit
var arr = new ArrayBuffer(8);
//create view to array buffer
var dv = new DataView(arr);
//set number to buffer as 64 bit float
dv.setFloat64(0, n);
//now get first 32 bit from array and convert it to integer
// from offset 0
var c = dv.getInt32(0);
//now get next 32 bit from array and convert it to integer
//from offset 4
var d = dv.getInt32(4);
//XOR first end second integer numbers
return c ^ d;
}
I think this can be useful for someone
EDIT: using one buffer and DataView is faster !
Here is a faster way to do this in JavaScript.
const kBuf = new ArrayBuffer(8);
const kBufAsF64 = new Float64Array(kBuf);
const kBufAsI32 = new Int32Array(kBuf);
function hashNumber(n) {
// Remove this `if` if you want 0 and -0 to hash to different values.
if (~~n === n) {
return ~~n;
}
kBufAsF64[0] = n;
return kBufAsI32[0] ^ kBufAsI32[1];
}
It's 250x faster than the DataView approach: see benchmark.
I looked up some hashing libraries to see how they did it: xxhashjs, jshashes, etc.
Most seem to take a string or an ArrayBuffer, and also depend on UINT32-like functionality. This is equivalent to you needing a binary representation of the double (from your C# example). Notably I did not find any solution that included more-strange types, other than in another (unanswered) question.
His solution uses a method proposed here, which converts it to various typed arrays. This is most likely what you want, and the fastest accurate solution (I think).
I highly recommend that you structure your code to traverse objects/arrays as desired, and also benchmark the solution to see how comparable it is to your existing methods (the non-working one and the string one).

Generate a Hash from string in Javascript

I need to convert strings to some form of hash. Is this possible in JavaScript?
I'm not utilizing a server-side language so I can't do it that way.
String.prototype.hashCode = function() {
var hash = 0,
i, chr;
if (this.length === 0) return hash;
for (i = 0; i < this.length; i++) {
chr = this.charCodeAt(i);
hash = ((hash << 5) - hash) + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
}
const str = 'revenue'
console.log(str, str.hashCode())
Source
Many of the answers here are the same String.hashCode hash function taken from Java. It dates back to 1981 from Gosling Emacs, is extremely weak, and makes zero sense performance-wise in modern JavaScript. In fact, implementations could be significantly faster by using ES6 Math.imul, but no one took notice. We can do much better than this, at essentially identical performance.
Here's one I did—cyrb53, a simple but high quality 53-bit hash. It's quite fast, provides very good* hash distribution, and because it outputs 53 bits, has significantly lower collision rates compared to any 32-bit hash. Also, you can ignore SA's CC license as it's public domain on my GitHub.
const cyrb53 = (str, seed = 0) => {
let h1 = 0xdeadbeef ^ seed,
h2 = 0x41c6ce57 ^ seed;
for (let i = 0, ch; i < str.length; i++) {
ch = str.charCodeAt(i);
h1 = Math.imul(h1 ^ ch, 2654435761);
h2 = Math.imul(h2 ^ ch, 1597334677);
}
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
return 4294967296 * (2097151 & h2) + (h1 >>> 0);
};
console.log(`cyrb53('a') -> ${cyrb53('a')}`)
console.log(`cyrb53('b') -> ${cyrb53('b')}`)
console.log(`cyrb53('revenge') -> ${cyrb53('revenge')}`)
console.log(`cyrb53('revenue') -> ${cyrb53('revenue')}`)
console.log(`cyrb53('revenue', 1) -> ${cyrb53('revenue', 1)}`)
console.log(`cyrb53('revenue', 2) -> ${cyrb53('revenue', 2)}`)
console.log(`cyrb53('revenue', 3) -> ${cyrb53('revenue', 3)}`)
*It is roughly similar to the well-known MurmurHash/xxHash algorithms. It uses a combination of multiplication and Xorshift to generate the hash, but not as thorough. As a result it's faster than either would be in JavaScript and significantly simpler to implement, but may not pass all tests in SMHasher. This is not a cryptographic hash function, so don't use this for security purposes.
Like any proper hash, it has an avalanche effect, which basically means small changes in the input have big changes in the output making the resulting hash appear more 'random':
"501c2ba782c97901" = cyrb53("a")
"459eda5bc254d2bf" = cyrb53("b")
"fbce64cc3b748385" = cyrb53("revenge")
"fb1d85148d13f93a" = cyrb53("revenue")
You can optionally supply a seed (unsigned integer, 32-bit max) for alternate streams of the same input:
"76fee5e6598ccd5c" = cyrb53("revenue", 1)
"1f672e2831253862" = cyrb53("revenue", 2)
"2b10de31708e6ab7" = cyrb53("revenue", 3)
Technically, it is a 64-bit hash, that is, two uncorrelated 32-bit hashes computed in parallel, but JavaScript is limited to 53-bit integers. If convenient, the full 64-bit output can be used by altering the return statement with a hex string or array.
return [h2>>>0, h1>>>0];
// or
return (h2>>>0).toString(16).padStart(8,0)+(h1>>>0).toString(16).padStart(8,0);
// or
return 4294967296n * BigInt(h2) + BigInt(h1);
Be aware that constructing hex strings drastically slows down batch processing. The array is much more efficient, but obviously requires two checks instead of one. I also included BigInt, which should be slightly faster than String, but still much slower than Array or Number.
Just for fun, here's TinySimpleHash, the smallest hash I could come up with that's still decent. It's a 32-bit hash in 89 chars with better quality randomness than even FNV or DJB2:
TSH=s=>{for(var i=0,h=9;i<s.length;)h=Math.imul(h^s.charCodeAt(i++),9**9);return h^h>>>9}
EDIT
based on my jsperf tests, the accepted answer is actually faster: http://jsperf.com/hashcodelordvlad
ORIGINAL
if anyone is interested, here is an improved ( faster ) version, which will fail on older browsers who lack the reduce array function.
hashCode = function(s) {
return s.split("").reduce(function(a, b) {
a = ((a << 5) - a) + b.charCodeAt(0);
return a & a;
}, 0);
}
// testing
console.log(hashCode("hello."));
console.log(hashCode("this is a text."));
console.log(hashCode("Despacito by Luis Fonsi"));
one-liner arrow function version :
hashCode = s => s.split('').reduce((a,b)=>{a=((a<<5)-a)+b.charCodeAt(0);return a&a},0)
// testing
console.log(hashCode("hello."));
console.log(hashCode("this is a text."));
console.log(hashCode("Despacito by Luis Fonsi"));
Note: Even with the best 32-bit hash, collisions will occur sooner or later.
The hash collision probability can be calculated as
,
approximated as
(see here).
This may be higher than intuition suggests:
Assuming a 32-bit hash and k=10,000 items, a collision will occur with a probability of 1.2%.
For 77,163 samples the probability becomes 50%!
(calculator).
I suggest a workaround at the bottom.
In an answer to this question
Which hashing algorithm is best for uniqueness and speed?,
Ian Boyd posted a good in depth analysis.
In short (as I interpret it), he comes to the conclusion that MurmurHash is best, followed by FNV-1a.
Java’s String.hashCode() algorithm that esmiralha proposed seems to be a variant of DJB2.
FNV-1a has a a better distribution than DJB2, but is slower
DJB2 is faster than FNV-1a, but tends to yield more collisions
MurmurHash3 is better and faster than DJB2 and FNV-1a (but the optimized implementation requires more lines of code than FNV and DJB2)
Some benchmarks with large input strings here: http://jsperf.com/32-bit-hash
When short input strings are hashed, murmur's performance drops, relative to DJ2B and FNV-1a: http://jsperf.com/32-bit-hash/3
So in general I would recommend murmur3.
See here for a JavaScript implementation:
https://github.com/garycourt/murmurhash-js
If input strings are short and performance is more important than distribution quality, use DJB2 (as proposed by the accepted answer by esmiralha).
If quality and small code size are more important than speed, I use this implementation of FNV-1a (based on this code).
/**
* Calculate a 32 bit FNV-1a hash
* Found here: https://gist.github.com/vaiorabbit/5657561
* Ref.: http://isthe.com/chongo/tech/comp/fnv/
*
* #param {string} str the input value
* #param {boolean} [asString=false] set to true to return the hash value as
* 8-digit hex string instead of an integer
* #param {integer} [seed] optionally pass the hash of the previous chunk
* #returns {integer | string}
*/
function hashFnv32a(str, asString, seed) {
/*jshint bitwise:false */
var i, l,
hval = (seed === undefined) ? 0x811c9dc5 : seed;
for (i = 0, l = str.length; i < l; i++) {
hval ^= str.charCodeAt(i);
hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
}
if( asString ){
// Convert to 8 digit hex string
return ("0000000" + (hval >>> 0).toString(16)).substr(-8);
}
return hval >>> 0;
}
Improve Collision Probability
As explained here, we can extend the hash bit size using this trick:
function hash64(str) {
var h1 = hash32(str); // returns 32 bit (as 8 byte hex string)
return h1 + hash32(h1 + str); // 64 bit (as 16 byte hex string)
}
Use it with care and don't expect too much though.
Based on accepted answer in ES6. Smaller, maintainable and works in modern browsers.
function hashCode(str) {
return str.split('').reduce((prevHash, currVal) =>
(((prevHash << 5) - prevHash) + currVal.charCodeAt(0))|0, 0);
}
// Test
console.log("hashCode(\"Hello!\"): ", hashCode('Hello!'));
EDIT (2019-11-04):
one-liner arrow function version :
const hashCode = s => s.split('').reduce((a,b) => (((a << 5) - a) + b.charCodeAt(0))|0, 0)
// test
console.log(hashCode('Hello!'))
I'm a bit surprised nobody has talked about the new SubtleCrypto API yet.
To get an hash from a string, you can use the subtle.digest method :
function getHash(str, algo = "SHA-256") {
let strBuf = new TextEncoder().encode(str);
return crypto.subtle.digest(algo, strBuf)
.then(hash => {
window.hash = hash;
// here hash is an arrayBuffer,
// so we'll connvert it to its hex version
let result = '';
const view = new DataView(hash);
for (let i = 0; i < hash.byteLength; i += 4) {
result += ('00000000' + view.getUint32(i).toString(16)).slice(-8);
}
return result;
});
}
getHash('hello world')
.then(hash => {
console.log(hash);
});
This is a refined and better performing variant, and matches Java's implementation of the standard object.hashCode() for CharSequence.
String.prototype.hashCode = function() {
var hash = 0, i = 0, len = this.length;
while ( i < len ) {
hash = ((hash << 5) - hash + this.charCodeAt(i++)) << 0;
}
return hash;
};
Here is also one that returns only positive hashcodes:
String.prototype.hashcode = function() {
return this.hashCode()+ 2147483647 + 1;
};
And here is a matching one for Java that only returns positive hashcodes:
public static long hashcode(Object obj) {
return ((long) obj.hashCode()) + Integer.MAX_VALUE + 1l;
}
Without prototype for those that do not want to attach it to String :
function hashCode(str) {
var hash = 0, i = 0, len = str.length;
while ( i < len ) {
hash = ((hash << 5) - hash + str.charCodeAt(i++)) << 0;
}
return hash;
}
function hashcode(str) {
hashCode(str) + 2147483647 + 1;
}
Enjoy!
If it helps anyone, I combined the top two answers into an older-browser-tolerant version, which uses the fast version if reduce is available and falls back to esmiralha's solution if it's not.
/**
* #see http://stackoverflow.com/q/7616461/940217
* #return {number}
*/
String.prototype.hashCode = function(){
if (Array.prototype.reduce){
return this.split("").reduce(function(a,b){a=((a<<5)-a)+b.charCodeAt(0);return a&a},0);
}
var hash = 0;
if (this.length === 0) return hash;
for (var i = 0; i < this.length; i++) {
var character = this.charCodeAt(i);
hash = ((hash<<5)-hash)+character;
hash = hash & hash; // Convert to 32bit integer
}
return hash;
}
Usage is like:
var hash = "some string to be hashed".hashCode();
UUID v3 and UUID v5 actually are hashes for a given input string.
UUID v3 is based on MD5,
UUID v5 is based on SHA-1.
So, the most obvious choice would be to go for UUID v5.
Fortunately, there is a popular npm package, which includes all UUID algorithms.
npm install uuid
To actually generate a UUID v5, you need a unique namespace. This namespace acts like a seed, and should be a constant, to assure that for a given input the output will always be the same. Ironically, you should generate a UUID v4 as a namespace. And the easiest way to do so, is using some online tool.
Once you've got a namespace, you're all set.
import { v5 as uuidv5 } from 'uuid';
const MY_NAMESPACE = '1b671a64-40d5-491e-99b0-da01ff1f3341';
const hash = uuidv5('input', MY_NAMESPACE);
If your input string will always be an URL for instance, then there are some default namespaces which you can use.
const hashForURL = uuidv5('https://www.w3.org/', uuidv5.URL);
Here is a compact ES6 friendly readable snippet
const stringHashCode = str => {
let hash = 0
for (let i = 0; i < str.length; ++i)
hash = Math.imul(31, hash) + str.charCodeAt(i)
return hash | 0
}
I'm kinda late to the party, but you can use this module: crypto:
const crypto = require('crypto');
const SALT = '$ome$alt';
function generateHash(pass) {
return crypto.createHmac('sha256', SALT)
.update(pass)
.digest('hex');
}
The result of this function is always is 64 characters string; something like this: "aa54e7563b1964037849528e7ba068eb7767b1fab74a8d80fe300828b996714a"
My quick (very long) one liner based on FNV's Multiply+Xor method:
my_string.split('').map(v=>v.charCodeAt(0)).reduce((a,v)=>a+((a<<7)+(a<<3))^v).toString(16);
Thanks to the example by mar10, I found a way to get the same results in C# AND Javascript for an FNV-1a. If unicode chars are present, the upper portion is discarded for the sake of performance. Don't know why it would be helpful to maintain those when hashing, as am only hashing url paths for now.
C# Version
private static readonly UInt32 FNV_OFFSET_32 = 0x811c9dc5; // 2166136261
private static readonly UInt32 FNV_PRIME_32 = 0x1000193; // 16777619
// Unsigned 32bit integer FNV-1a
public static UInt32 HashFnv32u(this string s)
{
// byte[] arr = Encoding.UTF8.GetBytes(s); // 8 bit expanded unicode array
char[] arr = s.ToCharArray(); // 16 bit unicode is native .net
UInt32 hash = FNV_OFFSET_32;
for (var i = 0; i < s.Length; i++)
{
// Strips unicode bits, only the lower 8 bits of the values are used
hash = hash ^ unchecked((byte)(arr[i] & 0xFF));
hash = hash * FNV_PRIME_32;
}
return hash;
}
// Signed hash for storing in SQL Server
public static Int32 HashFnv32s(this string s)
{
return unchecked((int)s.HashFnv32u());
}
JavaScript Version
var utils = utils || {};
utils.FNV_OFFSET_32 = 0x811c9dc5;
utils.hashFnv32a = function (input) {
var hval = utils.FNV_OFFSET_32;
// Strips unicode bits, only the lower 8 bits of the values are used
for (var i = 0; i < input.length; i++) {
hval = hval ^ (input.charCodeAt(i) & 0xFF);
hval += (hval << 1) + (hval << 4) + (hval << 7) + (hval << 8) + (hval << 24);
}
return hval >>> 0;
}
utils.toHex = function (val) {
return ("0000000" + (val >>> 0).toString(16)).substr(-8);
}
A fast and concise one which was adapted from here:
String.prototype.hashCode = function() {
var hash = 5381, i = this.length
while(i)
hash = (hash * 33) ^ this.charCodeAt(--i)
return hash >>> 0;
}
SubtleCrypto.digest
I’m not utilizing a server-side language so I can’t do it that way.
Are you sure you can’t do it that way?
Did you forget you’re using Javascript, the language ever-evolving?
Try SubtleCrypto. It supports SHA-1, SHA-128, SHA-256, and SHA-512 hash functions.
async function hash(message/*: string */) {
const text_encoder = new TextEncoder;
const data = text_encoder.encode(message);
const message_digest = await window.crypto.subtle.digest("SHA-512", data);
return message_digest;
} // -> ArrayBuffer
function in_hex(data/*: ArrayBuffer */) {
const octets = new Uint8Array(data);
const hex = [].map.call(octets, octet => octet.toString(16).padStart(2, "0")).join("");
return hex;
} // -> string
(async function demo() {
console.log(in_hex(await hash("Thanks for the magic.")));
})();
The Jenkins One at a Time Hash is quite nice:
//Credits (modified code): Bob Jenkins (http://www.burtleburtle.net/bob/hash/doobs.html)
//See also: https://en.wikipedia.org/wiki/Jenkins_hash_function
//Takes a string of any size and returns an avalanching hash string of 8 hex characters.
function jenkinsOneAtATimeHash(keyString)
{
let hash = 0;
for (charIndex = 0; charIndex < keyString.length; ++charIndex)
{
hash += keyString.charCodeAt(charIndex);
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 11;
//4,294,967,295 is FFFFFFFF, the maximum 32 bit unsigned integer value, used here as a mask.
return (((hash + (hash << 15)) & 4294967295) >>> 0).toString(16)
};
Examples:
jenkinsOneAtATimeHash('test')
"31c25ec1"
jenkinsOneAtATimeHash('a')
"ca2e9442"
jenkinsOneAtATimeHash('0')
"6e3c5c6b"
You can also remove the .toString(16) part at the end to generate numbers:
jenkinsOneAtATimeHash2('test')
834821825
jenkinsOneAtATimeHash2('a')
3392050242
jenkinsOneAtATimeHash2('0')
1849449579
Note that if you do not need to hash a string or key specifically, but just need a hash generated out of thin air, you can use:
window.crypto.getRandomValues(new Uint32Array(1))[0].toString(16)
Examples:
window.crypto.getRandomValues(new Uint32Array(1))[0].toString(16)
"6ba9ea7"
window.crypto.getRandomValues(new Uint32Array(1))[0].toString(16)
"13fe7edf"
window.crypto.getRandomValues(new Uint32Array(1))[0].toString(16)
"971ffed4"
and the same as above, you can remove the `.toString(16) part at the end to generate numbers:
window.crypto.getRandomValues(new Uint32Array(1))[0]
1154752776
window.crypto.getRandomValues(new Uint32Array(1))[0]
3420298692
window.crypto.getRandomValues(new Uint32Array(1))[0]
1781389127
Note: You can also generate multiple values at once with this method, e.g.:
window.crypto.getRandomValues(new Uint32Array(3))
Uint32Array(3) [ 2050530949, 3280127172, 3001752815 ]
I needed a similar function (but different) to generate a unique-ish ID based on the username and current time. So:
window.newId = ->
# create a number based on the username
unless window.userNumber?
window.userNumber = 0
for c,i in window.MyNamespace.userName
char = window.MyNamespace.userName.charCodeAt(i)
window.MyNamespace.userNumber+=char
((window.MyNamespace.userNumber + Math.floor(Math.random() * 1e15) + new Date().getMilliseconds()).toString(36)).toUpperCase()
Produces:
2DVFXJGEKL
6IZPAKFQFL
ORGOENVMG
... etc
edit Jul 2022: As #canRau points out the authors of shortid prefer nanoid now https://github.com/ai/nanoid/
If you want to avoid collisions you may want to use a secure hash like SHA-256.
There are several JavaScript SHA-256 implementations.
I wrote tests to compare several hash implementations, see https://github.com/brillout/test-javascript-hash-implementations.
Or go to http://brillout.github.io/test-javascript-hash-implementations/, to run the tests.
I do not see any reason to use this overcomplicated crypto code instead of ready-to-use solutions, like object-hash library, or etc. relying on vendor is more productive, saves time and reduces maintenance cost.
Just use https://github.com/puleos/object-hash
var hash = require('object-hash');
hash({foo: 'bar'}) // => '67b69634f9880a282c14a0f0cb7ba20cf5d677e9'
hash([1, 2, 2.718, 3.14159]) // => '136b9b88375971dff9f1af09d7356e3e04281951'
I have combined the two solutions (users esmiralha and lordvlad) to get a function that should be faster for browsers that support the js function reduce() and still compatible with old browsers:
String.prototype.hashCode = function() {
if (Array.prototype.reduce) {
return this.split("").reduce(function(a,b){a=((a<<5)-a)+b.charCodeAt(0);return a&a},0);
} else {
var hash = 0, i, chr, len;
if (this.length == 0) return hash;
for (i = 0, len = this.length; i < len; i++) {
chr = this.charCodeAt(i);
hash = ((hash << 5) - hash) + chr;
hash |= 0; // Convert to 32bit integer
}
return hash;
}
};
Example:
my_string = 'xyz';
my_string.hashCode();
I went for a simple concatenation of char codes converted to hex strings. This serves a relatively narrow purpose, namely just needing a hash representation of a SHORT string (e.g. titles, tags) to be exchanged with a server side that for not relevant reasons can't easily implement the accepted hashCode Java port. Obviously no security application here.
String.prototype.hash = function() {
var self = this, range = Array(this.length);
for(var i = 0; i < this.length; i++) {
range[i] = i;
}
return Array.prototype.map.call(range, function(i) {
return self.charCodeAt(i).toString(16);
}).join('');
}
This can be made more terse and browser-tolerant with Underscore. Example:
"Lorem Ipsum".hash()
"4c6f72656d20497073756d"
I suppose if you wanted to hash larger strings in similar fashion you could just reduce the char codes and hexify the resulting sum rather than concatenate the individual characters together:
String.prototype.hashLarge = function() {
var self = this, range = Array(this.length);
for(var i = 0; i < this.length; i++) {
range[i] = i;
}
return Array.prototype.reduce.call(range, function(sum, i) {
return sum + self.charCodeAt(i);
}, 0).toString(16);
}
'One time, I hired a monkey to take notes for me in class. I would just sit back with my mind completely blank while the monkey scribbled on little pieces of paper. At the end of the week, the teacher said, "Class, I want you to write a paper using your notes." So I wrote a paper that said, "Hello! My name is Bingo! I like to climb on things! Can I have a banana? Eek, eek!" I got an F. When I told my mom about it, she said, "I told you, never trust a monkey!"'.hashLarge()
"9ce7"
Naturally more risk of collision with this method, though you could fiddle with the arithmetic in the reduce however you wanted to diversify and lengthen the hash.
Adding this because nobody did yet, and this seems to be asked for and implemented a lot with hashes, but it's always done very poorly...
This takes a string input, and a maximum number you want the hash to equal, and produces a unique number based on the string input.
You can use this to produce a unique index into an array of images (If you want to return a specific avatar for a user, chosen at random, but also chosen based on their name, so it will always be assigned to someone with that name).
You can also use this, of course, to return an index into an array of colors, like for generating unique avatar background colors based on someone's name.
function hashInt (str, max = 1000) {
var hash = 0;
for (var i = 0; i < str.length; i++) {
hash = ((hash << 5) - hash) + str.charCodeAt(i);
hash = hash & hash;
}
return Math.round(max * Math.abs(hash) / 2147483648);
}
This generates a consistent hash based on any number of params passed in:
/**
* Generates a hash from params passed in
* #returns {string} hash based on params
*/
function fastHashParams() {
var args = Array.prototype.slice.call(arguments).join('|');
var hash = 0;
if (args.length == 0) {
return hash;
}
for (var i = 0; i < args.length; i++) {
var char = args.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32bit integer
}
return String(hash);
}
fastHashParams('hello world') outputs "990433808"
fastHashParams('this',1,'has','lots','of','params',true) outputs "1465480334"
Slightly simplified version of #esmiralha's answer.
I don't override String in this version, since that could result in some undesired behaviour.
function hashCode(str) {
var hash = 0;
for (var i = 0; i < str.length; i++) {
hash = ~~(((hash << 5) - hash) + str.charCodeAt(i));
}
return hash;
}
function hashCode(str) {
return str.split('').reduce((prevHash, currVal) =>
(((prevHash << 5) - prevHash) + currVal.charCodeAt(0))|0, 0);
}
// Test
console.log("hashCode(\"Hello!\"): ", hashCode('Hello!'));

Create a large bit field?

I want to create a large bit field in JavaScript that will effectively represent a multi-dimensional array of markers (uses indexing to jump to various dimensions in the physical "1D" structure).
Rather than a bunch of numbers, I'm considering how I might use a string as bits, so I can allocate a string of the appropriate length first. Considerations such as data types, Unicode and conversions come into play (also no Unicode support before JavaScript 1.3).
However I'm open about other suggestions how to use JavaScript to achieve a large bit field.
Update:
Just for informational purposes: On average I might be using ~2187 bits/markers (274 bytes), but would like a generic answer than can accommodate many more bits.
One problem with strings is that they are immutable, so if you want to change anything, you would need to rebuild the string.
I would just stick to using numbers. Using the bitwise operators, you can fit 32 bits in each number.
You could fit up to 53 bits, since JavaScript numbers are double-precision floating point, but the bitwise operators convert their operands to 32-bit integers, so you wouldn't be able to use them to get at the individual bits (if you wanted to, you could accomplish the same thing with combinations of division, Math.pow, etc. but it would be more complicated).
Here's a basic implementation that lets you get, set, and unset individual bits:
function BitField() {
this.values = []; // You could set the length here, but it's not necessary
}
BitField.prototype.get = function(i) {
var index = (i / 32) | 0; // | 0 converts to an int. Math.floor works too.
var bit = i % 32;
return (this.values[index] & (1 << bit)) !== 0;
};
BitField.prototype.set = function(i) {
var index = (i / 32) | 0;
var bit = i % 32;
this.values[index] |= 1 << bit;
};
BitField.prototype.unset = function(i) {
var index = (i / 32) | 0;
var bit = i % 32;
this.values[index] &= ~(1 << bit);
};
In recent browsers, efficient numeric array types are available. There is no bit-array, but you might use Uint8Array or Uint32Array and pack the bits yourself (in a similar fashion to Matthew Crumley's answer; just use a numeric array instead of []).
Obselete but equivalent answer (CanvasPixelArray has been replaced by Uint8ClampedArray):
If the browser you're targeting supports <canvas>, then you might borrow a CanvasPixelArray object (canvas.getContext("2d").createImageData(...).data; note that this need not be the same size as the canvas) to (hopefully) memory-efficiently store the data (each element is an octet). And if your data is 2D, you can get a visualization for free!
This is an extension to Matthew Crumley's post from 2010:
I took Matthew's code, added pre-allocation and compared it to typed array implementations.
This jsperf shows that Chrome is the fastest and sanest (I would expect Uint32Array to perform the fastest) and that IE only defined the interfaces but did not care to optimize typed arrays. The Firefox results are obscured because the console is flooded with warnings about how JSPerf "compiles" the test code.
("Other" is (my apparently very private) IE 11.)
Uint8Array Implementation
function BitField8(nSize) {
var nBytes = Math.ceil(nSize/8) | 0;
this.values = new Uint8Array(nBytes);
}
BitField8.prototype.get = function(i) {
var index = (i / 8) | 0;
var bit = i % 8;
return (this.values[index] & (1 << bit)) !== 0;
};
BitField8.prototype.set = function(i) {
var index = (i / 8) | 0;
var bit = i % 8;
this.values[index] |= 1 << bit;
};
BitField8.prototype.unset = function(i) {
var index = (i / 8) | 0;
var bit = i % 8;
this.values[index] &= ~(1 << bit);
};
Uint32Array Implementation
function BitField32(nSize) {
var nNumbers = Math.ceil(nSize/32) | 0;
this.values = new Uint32Array(nNumbers);
}
BitField32.prototype.get = function(i) {
var index = (i / 32) | 0;
var bit = i % 32;
return (this.values[index] & (1 << bit)) !== 0;
};
BitField32.prototype.set = function(i) {
var index = (i / 32) | 0;
var bit = i % 32;
this.values[index] |= 1 << bit;
};
BitField32.prototype.unset = function(i) {
var index = (i / 32) | 0;
var bit = i % 32;
this.values[index] &= ~(1 << bit);
};
In chrome, I get about 10,000 bits.
var bitfield = 0;
var flag1 = 2 << 1;
var flag2 = 2 << 2;
var flagmax = 2 << 10000;
bitfield |= flagmax
if (bitfield & flagmax) {
doSomething();
}

Categories

Resources