Converting HashString from C to JS - javascript

I'm trying to convert this function from the Mozilla Firefox code base, it's called HashString. It calls a bunch of functions which are all in this file: https://dxr.mozilla.org/mozilla-central/source/mfbt/HashFunctions.h#294
So these are the C functions it calls:
static const uint32_t kGoldenRatioU32 = 0x9E3779B9U;
MOZ_WARN_UNUSED_RESULT inline uint32_t
HashString(const wchar_t* aStr)
{
return detail::HashUntilZero(aStr);
}
template<typename T>
uint32_t
HashUntilZero(const T* aStr)
{
uint32_t hash = 0;
for (T c; (c = *aStr); aStr++) {
hash = AddToHash(hash, c);
}
return hash;
}
MOZ_WARN_UNUSED_RESULT inline uint32_t
AddToHash(uint32_t aHash, A* aA)
{
/*
* You might think this function should just take a void*. But then we'd only
* catch data pointers and couldn't handle function pointers.
*/
static_assert(sizeof(aA) == sizeof(uintptr_t), "Strange pointer!");
return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, uintptr_t(aA));
}
inline uint32_t
AddUintptrToHash<8>(uint32_t aHash, uintptr_t aValue)
{
/*
* The static cast to uint64_t below is necessary because this function
* sometimes gets compiled on 32-bit platforms (yes, even though it's a
* template and we never call this particular override in a 32-bit build). If
* we do aValue >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
* right 32 bits, and the compiler throws an error.
*/
uint32_t v1 = static_cast<uint32_t>(aValue);
uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32);
return AddU32ToHash(AddU32ToHash(aHash, v1), v2);
}
inline uint32_t
AddU32ToHash(uint32_t aHash, uint32_t aValue)
{
return kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue);
}
inline uint32_t
RotateBitsLeft32(uint32_t aValue, uint8_t aBits)
{
MOZ_ASSERT(aBits < 32);
return (aValue << aBits) | (aValue >> (32 - aBits));
}
And here is my js code:
function HashString(aStr, aLength) {
// moz win32 hash function
if (aLength) {
console.error('NS_ERROR_NOT_IMPLEMENTED');
throw Components.results.NS_ERROR_NOT_IMPLEMENTED;
} else {
return HashUntilZero(aStr);
}
}
function HashUntilZero(aStr) {
var hash = 0;
//for (T c; (c = *aStr); aStr++) {
for (var c=0; c<aStr.length; c++) {
hash = AddToHash(hash, aStr.charCodeAt(c));
}
return hash;
}
function AddToHash(aHash, aA) {
//return detail::AddU32ToHash(aHash, aA);
//return AddU32ToHash(aHash, aA);
//return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, aA);
return AddUintptrToHash(aHash, aA);
}
function AddUintptrToHash(aHash, aValue) {
//return AddU32ToHash(aHash, static_cast<uint32_t>(aValue));
return AddU32ToHash(aHash, aValue);
}
function AddU32ToHash(aHash, aValue) {
var kGoldenRatioU32 = 0x9E3779B9;
return (kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue));
}
function RotateBitsLeft32(aValue, aBits) {
// MOZ_ASSERT(aBits < 32);
return (aValue << aBits) | (aValue >> (32 - aBits));
}
console.log(HashString('C:\Users\Vayeate\AppData\Roaming\Mozilla\Firefox\Profiles\aksozfjt.Unnamed Profile 10')); // should return 3181739213
This isn't working right, doing HashString('C:\Users\Vayeate\AppData\Roaming\Mozilla\Firefox\Profiles\aksozfjt.Unnamed Profile 10') should return to me 3181739213 however it's not. It keeps returning to me: -159266146140

Let's implement a more minimal C++ version first, which also dumps intermediate values which we can later compare.
#include <iostream>
#include <iomanip>
#include <stdint.h>
using namespace std;
static const uint32_t gr = 0x9E3779B9U;
template<typename T>
static uint32_t add(uint32_t hash, T val) {
const uint32_t rv = gr * (((hash << 5) | (hash >> 27)) ^ val);
cerr << dec << setw(7) << (uint32_t)val << " " << setw(14) << rv << " " << hex << rv << endl;
return rv;
}
int main() {
const auto text = string("C:\\Users\\Vayeate\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\aksozfjt.Unnamed Profile 10");
uint32_t rv = 0;
for (auto c: text) {
rv = add(rv, c);
}
cout << "Result: " << dec << setw(14) << rv << " " << hex << rv << endl;
}
Result: 3181739213 bda57ccd, so we're on the right track.
Now, for some Javascript:
GetNativePath returns an nsAutoCString aka. 8-bit string, by converting the internal 16-bit string to UTF-8.
Javascript does not actually know about 32-bit unsigned integers, just 32-bit signed integers, but there are some dirty tricks (mainly the >>> 0 "unsigned cast").
32-bit unsigned multiplication does not work, but we can actually implement that operation ourselves.
Properly escaping the backslashes \ in your test string also helps ;)
Putting these things together, I arrived at the following function, which seems to produce correct results.
/**
* Javascript implementation of
* https://hg.mozilla.org/mozilla-central/file/0cefb584fd1a/mfbt/HashFunctions.h
* aka. the mfbt hash function.
*/
let HashString = (function() {
// Note: >>>0 is basically a cast-to-unsigned for our purposes.
const encoder = new TextEncoder("utf-8");
const kGoldenRatio = 0x9E3779B9;
// Multiply two uint32_t like C++ would ;)
const mul32 = (a, b) => {
// Split into 16-bit integers (hi and lo words)
let ahi = (a >> 16) & 0xffff;
let alo = a & 0xffff;
let bhi = (b >> 16) & 0xffff
let blo = b & 0xffff;
// Compute new hi and lo seperately and recombine.
return (
(((((ahi * blo) + (alo * bhi)) & 0xffff) << 16) >>> 0) +
(alo * blo)
) >>> 0;
};
// kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue);
const add = (hash, val) => {
// Note, cannot >> 27 here, but / (1<<27) works as well.
let rotl5 = (
((hash << 5) >>> 0) |
(hash / (1<<27)) >>> 0
) >>> 0;
return mul32(kGoldenRatio, (rotl5 ^ val) >>> 0);
}
return function(text) {
// Convert to utf-8.
// Also decomposes the string into uint8_t values already.
let data = encoder.encode(text);
// Compute the actual hash
let rv = 0;
for (let c of data) {
rv = add(rv, c | 0);
}
return rv;
};
})();
let res = HashString('C:\\Users\\Vayeate\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\aksozfjt.Unnamed Profile 10');
console.log(res, res === 3181739213);
Might not be the most efficient implementation, but well, it works at least ;)

There is a simpler way
var file = new FileUtils.File('C:\\Users\\Vayeate\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\aksozfjt.Unnamed Profile 10');
file.QueryInterface(Ci.nsIHashable);
console.log(file.hashCode === 3181739213);

Related

Hash algorithm issue with converting code from javascript to python

To get same hash function in javascript and python I tried to convert my javaScript function to python and completely stack with gooooogle int as result in python variant.
javascript
function str_hash(var s) {
var hash = 0, i, chr;
if (s.length === 0) return hash;
for (i = 0; i < s.length; i++)
{
chr = s.charCodeAt(i);
hash = ((hash << 5) - hash) + chr;
hash |= 0;
}
return hash;
};
output is like: -34998534 whatever
my python try:
def get_hash(s):
h = 0
if not s:
return h
for i in range(0, len(s)):
h = ((h << 5) - h) + i
h |= 0
return h
print(get_hash('PUT LONG STRING HERE'))
output is like: 349832894283058945028049523548405975892375489743847490218348927483984793048218934148973940218340298489273942374902938490238482942930729487210948239407329403082738940214 whatever
Any ideas how to fix it?
here is the python equivalent code
def str_hash(s):
hash = 0
for i in range(len(s)):
chr = ord(s[i])
hash = ((hash << 5) - hash) + chr
hash = hash & 0xFFFFFFFF
return hash
print(str_hash("PUT LONG STRING HERE"))
the << operator in javascript is called the bitwise left shift operator. and the native code for this is explained in the below example
let num = 4
let shiftBy = 2
print(num * 2 ** shiftBy) // num * Math.pow(2,shiftBy)
print(num << shiftBy)

Javascript equivalent of Java's UUID class

In java, you can do something like
UUID id = UUID.fromString("eb66c416-4739-465b-9af3-9dc33ed8eef9");
long msb = id.getMostSignificantBits();
long lsb = id.getLeastSignificantBits();
System.out.println(msb + ", " + lsb);
// -1484283427208739237, -7281302710629372167
System.out.println(new UUID(msb, lsb));
// eb66c416-4739-465b-9af3-9dc33ed8eef9
This same example is referenced in another question which is pretty similar, so this would be a follow up. While in the related question problem of lsb, msb -> string was solved, I cannot find solution for reverse problem, string -> msb, lsb
The original solution was
function toUuidString(lsb, msb) {
return `${digits(msb >> 32n, 8n)}-${digits(msb >> 16n, 4n)}-${digits(
msb,
4n
)}-${digits(lsb >> 48n, 4n)}-${digits(lsb, 12n)}`
}
function digits(value, ds) {
const hi = 1n << (ds * 4n)
return (hi | (value & (hi - 1n))).toString(16).slice(1)
}
Now I'd like to have a function that takes in string and returns msb and lsb.
Following original questions' paths, I've discovered java source code and tried to do the equivalent, which would be:
function fromString(name) {
let components = name.split('-')
if (components.length !== 5) {
throw new Error(`Invalid UUID string: ${name}`)
}
for (let index = 0; index < 5; index++) {
components[index] = `0x${components[index]}`
}
let mostSigBits = Number.parseInt(components[0], 16)
mostSigBits <<= 16
mostSigBits |= Number.parseInt(components[1], 16)
mostSigBits <<= 16
mostSigBits |= Number.parseInt(components[2], 16)
let leastSigBits = Number.parseInt(components[3], 16)
leastSigBits <<= 48
leastSigBits |= Number.parseInt(components[4], 16)
return {
leastSigBits,
mostSigBits,
}
}
However, when I try to test this with something like:
const originalUuid = 'eb66c416-4739-465b-9af3-9dc33ed8eef9'
const parts = fromString(originalUuid)
const newUUid = toUuidString(
BigInt(parts.leastSigBits),
BigInt(parts.mostSigBits)
)
console.log('Original', originalUuid)
console.log('New', newUUid)
I do not get equivalent uuids. They have equivalent parts but some parts are missing
Original eb66c416-4739-465b-9af3-9dc33ed8eef9
New 00000000-4739-465b-ffff-ffffbefbeef9
Any ideas what went wrong?
Finally I found the problem - two codes were not strictly equivalent, java source code declared mostSigBits and leastSigBits as long, which cannot be represented in javascript, so we need to use BigInt.
To sum up my question and previous question, javascript equivalent for java's UUID operations would be:
string -> msb, lsb
function fromString(name) {
let components = name.split('-')
if (components.length !== 5) {
throw new Error(`Invalid UUID string: ${name}`)
}
for (let index = 0; index < 5; index++) {
components[index] = `0x${components[index]}`
}
let mostSigBits = BigInt(Number.parseInt(components[0], 16))
mostSigBits <<= 16n
mostSigBits |= BigInt(Number.parseInt(components[1], 16))
mostSigBits <<= 16n
mostSigBits |= BigInt(Number.parseInt(components[2], 16))
let leastSigBits = BigInt(Number.parseInt(components[3], 16))
leastSigBits <<= 48n
leastSigBits |= BigInt(Number.parseInt(components[4], 16))
return {
leastSigBits,
mostSigBits,
}
}
msb, lsb -> string (from the referenced question)
function toUuidString(lsb, msb) {
return `${digits(msb >> 32n, 8n)}-${digits(msb >> 16n, 4n)}-${digits(
msb,
4n
)}-${digits(lsb >> 48n, 4n)}-${digits(lsb, 12n)}`
}
function digits(value, ds) {
const hi = 1n << (ds * 4n)
return (hi | (value & (hi - 1n))).toString(16).slice(1)
}

How to build CRC32 table for Ogg?

From this answer I adapted the code below:
function _makeCRCTable() {
const CRCTable = new Uint32Array(256);
for (let i = 256; i--;) {
let char = i;
for (let j = 8; j--;) {
char = char & 1 ? 3988292384 ^ char >>> 1 : char >>> 1;
}
CRCTable[i] = char;
}
return CRCTable;
}
This code generates table as here, but for Ogg I need another table - as here.
From Ogg documentation:
32 bit CRC value (direct algorithm, initial val and final XOR = 0,
generator polynomial=0x04c11db7)
parseInt('04c11db7', 16)
return 79764919 - I tried this polynomial but resulting table is not correct.
I am new to the CRC field, as I found there are a few variations of CRC32 algorithm.
I'm not sure of javascript precedence, but the xor needs to occur after the shift:
char = char & 1 ? 3988292384 ^ (char >>> 1) : char >>> 1;
However the first table you show seems correct, as table[128] = table[0x80] = 3988292384 = 0xEDB88320 which is 0x104c11db7 bit reversed, then shifted right one bit.
The second table you have is for a left shifting CRC, where table[1] = x04c11db7. In this case the inner loop would include something like this:
let char = i << 24;
for (let j = 8; j--;) {
char = char & 0x80000000 ? 0x04c11db7 ^ char << 1 : char << 1;
}
Example C code for comparison, generates crc for the patterns {0x01}, {0x01,0x00}, {0x01,0x00,0x00}, {0x01,0x00,0x00,0x00}.
#include <stdio.h>
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
uint32_t crctbl[256];
void gentbl(void)
{
uint32_t crc;
uint32_t b;
uint32_t c;
uint32_t i;
for(c = 0; c < 0x100; c++){
crc = c<<24;
for(i = 0; i < 8; i++){
b = crc>>31;
crc <<= 1;
crc ^= (0 - b) & 0x04c11db7;
}
crctbl[c] = crc;
}
}
uint32_t crc32(uint8_t * bfr, size_t size)
{
uint32_t crc = 0;
while(size--)
crc = (crc << 8) ^ crctbl[(crc >> 24)^*bfr++];
return(crc);
}
int main(int argc, char** argv)
{
uint32_t crc;
uint8_t bfr[4] = {0x01,0x00,0x00,0x00};
gentbl();
crc = crc32(bfr, 1); /* 0x04c11db7 */
printf("%08x\n", crc);
crc = crc32(bfr, 2); /* 0xd219c1dc */
printf("%08x\n", crc);
crc = crc32(bfr, 3); /* 0x01d8ac87 */
printf("%08x\n", crc);
crc = crc32(bfr, 4); /* 0xdc6d9ab7 */
printf("%08x\n", crc);
return(0);
}
For JS:
function _makeCRC32Table() {
const polynomial = 79764919;
const mask = 2147483648;
const CRCTable = new Uint32Array(256);
for (let i = 256; i--;) {
let char = i << 24;
for (let j = 8; j--;) {
char = char & mask ? polynomial ^ char << 1 : char << 1;
}
CRCTable[i] = char;
}
return CRCTable;
}
How to use this table:
[1, 0].reduce((crc, byte) => crc << 8 >>> 0 ^ CRCTable[crc >>> 24 ^ byte], 0) >>> 0
Here we added >>> 0 that takes the module of the number - because there is no unsigned int in JS - JavaScript doesn't have integers. It only has double precision floating-point numbers.
Note that for Ogg you must set generated CRC in the reverse order.

IP-addresses stored as int results in overflow?

I'm writing a chat-server in node.js, and I want to store connected users IP-addresses in a mysql database as (unsigned) integers.
I have written a javascript method to convert an ip-address as string to an integer. I get some strange results however.
Here is my code:
function ipToInt(ip) {
var parts = ip.split(".");
var res = 0;
res += parseInt(parts[0], 10) << 24;
res += parseInt(parts[1], 10) << 16;
res += parseInt(parts[2], 10) << 8;
res += parseInt(parts[3], 10);
return res;
}
When I run call the method as ipToInt("192.168.2.44"); the result I get is -1062731220.
It seems like an overflow has occurred, which is strange, because the expected output (3232236076) is inside the number range in javascript (2^52).
When I inspect -1062731220 in binary form, I can see the 3232236076 is preserved, but filled with leading 1's.
I'm not sure, but I think the problem is with signed vs. unsigned integers.
Can any of you explain what is going on?
And possibly how to parse -1062731220 back to an string ip?
Why is the converted IP negative?
It's NOT an overflow. The first part of your IP address is 192 which converts to 11000000 in binary. You then shift that all the way to the left. When there is a 1 in the leftmost position of a 32 bit number, it's negative.
How do you convert back to a string?
Do the same thing you did to convert from a string but in reverse. Shift right (and mask)!
function intToIP(int) {
var part1 = int & 255;
var part2 = ((int >> 8) & 255);
var part3 = ((int >> 16) & 255);
var part4 = ((int >> 24) & 255);
return part4 + "." + part3 + "." + part2 + "." + part1;
}
Why reinvent the wheel? From Google:
OR, you can use what I found here:
http://javascript.about.com/library/blipconvert.htm
function dot2num(dot)
{
var d = dot.split('.');
return ((((((+d[0])*256)+(+d[1]))*256)+(+d[2]))*256)+(+d[3]);
}
function num2dot(num)
{
var d = num%256;
for (var i = 3; i > 0; i--)
{
num = Math.floor(num/256);
d = num%256 + '.' + d;
}
return d;
}
The result of the "<<" operator is always a signed, 32-bit integer, as per the spec.
When you shift back, use ">>>" to do an unsigned right shift.
You might also find this pattern useful:
ip.toLong = function toInt(ip){
var ipl=0;
ip.split('.').forEach(function( octet ) {
ipl<<=8;
ipl+=parseInt(octet);
});
return(ipl >>>0);
};
ip.fromLong = function fromInt(ipl){
return ( (ipl>>>24) +'.' +
(ipl>>16 & 255) +'.' +
(ipl>>8 & 255) +'.' +
(ipl & 255) );
};
If you're using something like node.js where you can add functionality through something like Npm then you can simply do:
npm install ip
To get that functionality from the source which is here:
https://github.com/indutny/node-ip/blob/master/lib/ip.js
You will also get a bunch of other IP utility functions with that.
You shifted left to get the original number - which is just 4 sets of bits regardless of the sign.
Shift right to get back to the IP. Doesn't matter what the sign is.
const ip2int = (x) => (x.split('.').reduce((a, v) => ((a << 8) + (+v)), 0) >>> 0);
One-Liner:
const ipToLong = ip => ip.split('.').map(parseFloat).reduce((total, part) => total * 256 + part);
Use this
function num2string(ip) {
return [24,16,8,0].map(n => (ip >> n) & 0xff).join(".")
}
function string2num(ip) {
return ip.split(".").reduce((sum,x,i) => sum + (x << 8*(3-i)), 0)
}
IP Addresses in the V4 space are unsigned 32 bit numbers, hence the IP address of FF.FF.FF.FF is 2^32 and cannot be greater then that number. Please see:
This stack overflow article on the same subject
To turn that number back into an IP address you must break the number down into its 4 parts since each byte is one octet of the address so convert the number to hex and then parse out each pair. You may or may not have to add a leading zero for the first octet.
Additionally you may have to deal with byte order of the integer ( endien issues ) but since most systems are intel based these days you might not have to deal with that.
var aaa = Number("0b"+ "192.168.2.44".split(".").map(
function(dec){
return ("00000000" + Number(dec).toString(2)).slice(-8);
}).join(""));
aaa.toString(2).match(/.{1,8}/g).map(
function(bin){
return Number("0b"+bin);
}).join(".");
I revised Evan's final answer a bit, particularly dot2num. It functions the same but might be more readable and is marginally slower.
function ip2num(ip) {
var parts = ip.split('.');
var num = 0;
num += d[0] * Math.pow(2, 24);
num += d[1] * Math.pow(2, 16);
num += d[2] * Math.pow(2, 8);
num += d[3];
return num;
}
function num2ip(num) {
var ip = num % 256;
for (var i=3; i > 0; i--) {
num = Math.floor(num / 256);
ip = num % 256 + '.' + ip;
}
return ip;
}
Try this solution, it might help:
function IpToInteger(ipAddr)
{
var parts = ipAddr.split('.');
return (((parts[0] ? parts[0] << 24 : 0) |
(parts[1] ? parts[1] << 16 : 0) |
(parts[2] ? parts[2] << 8 : 0) |
(parts[3])) >>> 0);
}
function IpAddressToLong(ip){
return ip.split('.').map((octet, index, array) => {
return parseInt(octet) * Math.pow(256, (array.length - index - 1));
}).reduce((prev, curr) => {
return prev + curr;
});
}
Taken from repo
function ip2num(ip) {
var d = ip.split(".");
var num = 0;
num += Number(d[0]) * Math.pow(256, 3);
num += Number(d[1]) * Math.pow(256, 2);
num += Number(d[2]) * Math.pow(256, 1);
num += Number(d[3]);
return num;
}
function num2ip(num) {
var ip = num % 256;
for (var i = 3; i > 0; i--) {
num = Math.floor(num / 256);
ip = (num % 256) + "." + ip;
}
return ip;
}
console.log(ip2num("192.168.0.1"));
console.log(num2ip(3232235521))
<h1>YOU IS WELCOME</h1>

How do you set, clear and toggle a single bit in JavaScript?

How to set, clear, toggle and check a bit in JavaScript?
To get a bit mask:
var mask = 1 << 5; // gets the 6th bit
To test if a bit is set:
if ((n & mask) != 0) {
// bit is set
} else {
// bit is not set
}
To set a bit:
n |= mask;
To clear a bit:
n &= ~mask;
To toggle a bit:
n ^= mask;
Refer to the Javascript bitwise operators.
I want to add some things (with thanks to #cletus)
function bit_test(num, bit){
return ((num>>bit) % 2 != 0)
}
function bit_set(num, bit){
return num | 1<<bit;
}
function bit_clear(num, bit){
return num & ~(1<<bit);
}
function bit_toggle(num, bit){
return bit_test(num, bit) ? bit_clear(num, bit) : bit_set(num, bit);
}
Get Bit
function getBit(number, bitPosition) {
return (number & (1 << bitPosition)) === 0 ? 0 : 1;
}
Set Bit
function setBit(number, bitPosition) {
return number | (1 << bitPosition);
}
Clear Bit
function clearBit(number, bitPosition) {
const mask = ~(1 << bitPosition);
return number & mask;
}
Update Bit
function updateBit(number, bitPosition, bitValue) {
const bitValueNormalized = bitValue ? 1 : 0;
const clearMask = ~(1 << bitPosition);
return (number & clearMask) | (bitValueNormalized << bitPosition);
}
Examples has been taken from JavaScript Algorithms and Data Structures repository.
I built a BitSet class with the help of #cletus information:
function BitSet() {
this.n = 0;
}
BitSet.prototype.set = function(p) {
this.n |= (1 << p);
}
BitSet.prototype.test = function(p) {
return (this.n & (1 << p)) !== 0;
}
BitSet.prototype.clear = function(p) {
this.n &= ~(1 << p);
}
BitSet.prototype.toggle = function(p) {
this.n ^= (1 << p);
}

Categories

Resources