Hash algorithm issue with converting code from javascript to python - javascript

To get same hash function in javascript and python I tried to convert my javaScript function to python and completely stack with gooooogle int as result in python variant.
javascript
function str_hash(var s) {
var hash = 0, i, chr;
if (s.length === 0) return hash;
for (i = 0; i < s.length; i++)
{
chr = s.charCodeAt(i);
hash = ((hash << 5) - hash) + chr;
hash |= 0;
}
return hash;
};
output is like: -34998534 whatever
my python try:
def get_hash(s):
h = 0
if not s:
return h
for i in range(0, len(s)):
h = ((h << 5) - h) + i
h |= 0
return h
print(get_hash('PUT LONG STRING HERE'))
output is like: 349832894283058945028049523548405975892375489743847490218348927483984793048218934148973940218340298489273942374902938490238482942930729487210948239407329403082738940214 whatever
Any ideas how to fix it?

here is the python equivalent code
def str_hash(s):
hash = 0
for i in range(len(s)):
chr = ord(s[i])
hash = ((hash << 5) - hash) + chr
hash = hash & 0xFFFFFFFF
return hash
print(str_hash("PUT LONG STRING HERE"))
the << operator in javascript is called the bitwise left shift operator. and the native code for this is explained in the below example
let num = 4
let shiftBy = 2
print(num * 2 ** shiftBy) // num * Math.pow(2,shiftBy)
print(num << shiftBy)

Related

Javascript equivalent of Java's UUID class

In java, you can do something like
UUID id = UUID.fromString("eb66c416-4739-465b-9af3-9dc33ed8eef9");
long msb = id.getMostSignificantBits();
long lsb = id.getLeastSignificantBits();
System.out.println(msb + ", " + lsb);
// -1484283427208739237, -7281302710629372167
System.out.println(new UUID(msb, lsb));
// eb66c416-4739-465b-9af3-9dc33ed8eef9
This same example is referenced in another question which is pretty similar, so this would be a follow up. While in the related question problem of lsb, msb -> string was solved, I cannot find solution for reverse problem, string -> msb, lsb
The original solution was
function toUuidString(lsb, msb) {
return `${digits(msb >> 32n, 8n)}-${digits(msb >> 16n, 4n)}-${digits(
msb,
4n
)}-${digits(lsb >> 48n, 4n)}-${digits(lsb, 12n)}`
}
function digits(value, ds) {
const hi = 1n << (ds * 4n)
return (hi | (value & (hi - 1n))).toString(16).slice(1)
}
Now I'd like to have a function that takes in string and returns msb and lsb.
Following original questions' paths, I've discovered java source code and tried to do the equivalent, which would be:
function fromString(name) {
let components = name.split('-')
if (components.length !== 5) {
throw new Error(`Invalid UUID string: ${name}`)
}
for (let index = 0; index < 5; index++) {
components[index] = `0x${components[index]}`
}
let mostSigBits = Number.parseInt(components[0], 16)
mostSigBits <<= 16
mostSigBits |= Number.parseInt(components[1], 16)
mostSigBits <<= 16
mostSigBits |= Number.parseInt(components[2], 16)
let leastSigBits = Number.parseInt(components[3], 16)
leastSigBits <<= 48
leastSigBits |= Number.parseInt(components[4], 16)
return {
leastSigBits,
mostSigBits,
}
}
However, when I try to test this with something like:
const originalUuid = 'eb66c416-4739-465b-9af3-9dc33ed8eef9'
const parts = fromString(originalUuid)
const newUUid = toUuidString(
BigInt(parts.leastSigBits),
BigInt(parts.mostSigBits)
)
console.log('Original', originalUuid)
console.log('New', newUUid)
I do not get equivalent uuids. They have equivalent parts but some parts are missing
Original eb66c416-4739-465b-9af3-9dc33ed8eef9
New 00000000-4739-465b-ffff-ffffbefbeef9
Any ideas what went wrong?
Finally I found the problem - two codes were not strictly equivalent, java source code declared mostSigBits and leastSigBits as long, which cannot be represented in javascript, so we need to use BigInt.
To sum up my question and previous question, javascript equivalent for java's UUID operations would be:
string -> msb, lsb
function fromString(name) {
let components = name.split('-')
if (components.length !== 5) {
throw new Error(`Invalid UUID string: ${name}`)
}
for (let index = 0; index < 5; index++) {
components[index] = `0x${components[index]}`
}
let mostSigBits = BigInt(Number.parseInt(components[0], 16))
mostSigBits <<= 16n
mostSigBits |= BigInt(Number.parseInt(components[1], 16))
mostSigBits <<= 16n
mostSigBits |= BigInt(Number.parseInt(components[2], 16))
let leastSigBits = BigInt(Number.parseInt(components[3], 16))
leastSigBits <<= 48n
leastSigBits |= BigInt(Number.parseInt(components[4], 16))
return {
leastSigBits,
mostSigBits,
}
}
msb, lsb -> string (from the referenced question)
function toUuidString(lsb, msb) {
return `${digits(msb >> 32n, 8n)}-${digits(msb >> 16n, 4n)}-${digits(
msb,
4n
)}-${digits(lsb >> 48n, 4n)}-${digits(lsb, 12n)}`
}
function digits(value, ds) {
const hi = 1n << (ds * 4n)
return (hi | (value & (hi - 1n))).toString(16).slice(1)
}

Using bitwise operators with large numbers in javascript [duplicate]

This question already has answers here:
Bitshift in javascript
(4 answers)
Closed 3 years ago.
I am writing a Javascript version of this Microsoft string decoding algorithm and its failing on large numbers. This seems to be because of sizing (int / long) issues. If I step through the code in C# I see that the JS implementation fails on this line
n |= (b & 31) << k;
This happens when the values are (and the C# result is 240518168576)
(39 & 31) << 35
If I play around with these values in C# I can replicate the JS issue if b is an int. And If I set b to be long it works correctly.
So then I checked the max size of a JS number, and compared it to the C# long result
240518168576 < Number.MAX_SAFE_INTEGER = true
So.. I can see that there is some kind of number size issue happening but do not know how to force JS to treat this number as a long.
Full JS code:
private getPointsFromEncodedString(encodedLine: string): number[][] {
const EncodingString = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
var points: number[][] = [];
if (!encodedLine) {
return points;
}
var index = 0;
var xsum = 0;
var ysum = 0;
while (index < encodedLine.length) {
var n = 0;
var k = 0;
debugger;
while (true) {
if (index >= encodedLine.length) {
return points;
}
var b = EncodingString.indexOf(encodedLine[index++]);
if (b == -1) {
return points;
}
n |= (b & 31) << k;
k += 5;
if (b < 32) {
break;
}
}
var diagonal = ((Math.sqrt(8 * n + 5) - 1) / 2);
n -= diagonal * (diagonal + 1) / 2;
var ny = n;
var nx = diagonal - ny;
nx = (nx >> 1) ^ -(nx & 1);
ny = (ny >> 1) ^ -(ny & 1);
xsum += nx;
ysum += ny;
points.push([ysum * 0.000001, xsum * 0.000001]);
}
console.log(points);
return points;
}
Expected input output:
Encoded string
qkoo7v4q-lmB0471BiuuNmo30B
Decoded points:
35.89431, -110.72522
35.89393, -110.72578
35.89374, -110.72606
35.89337, -110.72662
Bitwise operators treat their operands as a sequence of 32 bits
(zeroes and ones), rather than as decimal, hexadecimal, or octal
numbers. For example, the decimal number nine has a binary
representation of 1001. Bitwise operators perform their operations on
such binary representations, but they return standard JavaScript
numerical values.
(39 & 31) << 35 tries to shift 35 bits when there only 32
Bitwise Operators
To solve this problem you could use BigInt to perform those operations and then downcast it back to Number
Number((39n & 31n) << 35n)
You can try this:
function getPointsFromEncodedString(encodedLine) {
const EncodingString = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-";
var points = [];
if (!encodedLine) {
return points;
}
var index = 0;
var xsum = 0;
var ysum = 0;
while (index < encodedLine.length) {
var n = 0n;
var k = 0n;
while (true) {
if (index >= encodedLine.length) {
return points;
}
var b = EncodingString.indexOf(encodedLine[index++]);
if (b === -1) {
return points;
}
n |= (b & 31n) << k;
k += 5n;
if (b < 32n) {
break;
}
}
var diagonal = ((Math.sqrt(8 * Number(n) + 5) - 1) / 2);
n -= diagonal * (diagonal + 1) / 2;
var ny = n;
var nx = diagonal - ny;
nx = (nx >> 1) ^ -(nx & 1);
ny = (ny >> 1) ^ -(ny & 1);
xsum += Number(nx);
ysum += Number(ny);
points.push([ysum * 0.000001, xsum * 0.000001]);
}
console.log(points);
return points;
}

need help converting C# code to javascript / reversing a function output

I'm working on a function that converts a number to string and string to a number.
the original c# code
public static string unhash(Int64 hash)
{
string originalString = "";
Int64 mod = 37;
string letters = "acdegilmnoprstuw";
while (hash != 7)
{
Int64 index = hash % mod;
originalString = letters[(Int32) index] + originalString; // need help converting this line to javascript
hash = (hash - index) / mod;
}
return originalString;
}
the javascript code
this is working correctly as it convert string into hash that I want
function hash (s) {
var h = 7;
var letters = "acdegilmnoprstuw";
for (i = 0; i < s.length; i++) {
h = (h * 37 + letters.indexOf(s[i]))
}
return h;
}
the code to reverse the process of hash to string, it not working correctly
function unhash (hash) {
var originalString = "";
var mod = 37;
var letters = "acdegilmnoprstuw";
while( hash != 7) {
var index = hash % mod;
originalString = letters[(Int32Array)index] + originalString; // I'm not sure what the javascript version of int32
hash = (hash - index) / mod;
}
}
alert(hash("leepadg")); // this is the correct output 680131659347
alert(unhash( 680131659347)); //output supposed to be leepadg but returning undefined

Converting HashString from C to JS

I'm trying to convert this function from the Mozilla Firefox code base, it's called HashString. It calls a bunch of functions which are all in this file: https://dxr.mozilla.org/mozilla-central/source/mfbt/HashFunctions.h#294
So these are the C functions it calls:
static const uint32_t kGoldenRatioU32 = 0x9E3779B9U;
MOZ_WARN_UNUSED_RESULT inline uint32_t
HashString(const wchar_t* aStr)
{
return detail::HashUntilZero(aStr);
}
template<typename T>
uint32_t
HashUntilZero(const T* aStr)
{
uint32_t hash = 0;
for (T c; (c = *aStr); aStr++) {
hash = AddToHash(hash, c);
}
return hash;
}
MOZ_WARN_UNUSED_RESULT inline uint32_t
AddToHash(uint32_t aHash, A* aA)
{
/*
* You might think this function should just take a void*. But then we'd only
* catch data pointers and couldn't handle function pointers.
*/
static_assert(sizeof(aA) == sizeof(uintptr_t), "Strange pointer!");
return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, uintptr_t(aA));
}
inline uint32_t
AddUintptrToHash<8>(uint32_t aHash, uintptr_t aValue)
{
/*
* The static cast to uint64_t below is necessary because this function
* sometimes gets compiled on 32-bit platforms (yes, even though it's a
* template and we never call this particular override in a 32-bit build). If
* we do aValue >> 32 on a 32-bit machine, we're shifting a 32-bit uintptr_t
* right 32 bits, and the compiler throws an error.
*/
uint32_t v1 = static_cast<uint32_t>(aValue);
uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32);
return AddU32ToHash(AddU32ToHash(aHash, v1), v2);
}
inline uint32_t
AddU32ToHash(uint32_t aHash, uint32_t aValue)
{
return kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue);
}
inline uint32_t
RotateBitsLeft32(uint32_t aValue, uint8_t aBits)
{
MOZ_ASSERT(aBits < 32);
return (aValue << aBits) | (aValue >> (32 - aBits));
}
And here is my js code:
function HashString(aStr, aLength) {
// moz win32 hash function
if (aLength) {
console.error('NS_ERROR_NOT_IMPLEMENTED');
throw Components.results.NS_ERROR_NOT_IMPLEMENTED;
} else {
return HashUntilZero(aStr);
}
}
function HashUntilZero(aStr) {
var hash = 0;
//for (T c; (c = *aStr); aStr++) {
for (var c=0; c<aStr.length; c++) {
hash = AddToHash(hash, aStr.charCodeAt(c));
}
return hash;
}
function AddToHash(aHash, aA) {
//return detail::AddU32ToHash(aHash, aA);
//return AddU32ToHash(aHash, aA);
//return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, aA);
return AddUintptrToHash(aHash, aA);
}
function AddUintptrToHash(aHash, aValue) {
//return AddU32ToHash(aHash, static_cast<uint32_t>(aValue));
return AddU32ToHash(aHash, aValue);
}
function AddU32ToHash(aHash, aValue) {
var kGoldenRatioU32 = 0x9E3779B9;
return (kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue));
}
function RotateBitsLeft32(aValue, aBits) {
// MOZ_ASSERT(aBits < 32);
return (aValue << aBits) | (aValue >> (32 - aBits));
}
console.log(HashString('C:\Users\Vayeate\AppData\Roaming\Mozilla\Firefox\Profiles\aksozfjt.Unnamed Profile 10')); // should return 3181739213
This isn't working right, doing HashString('C:\Users\Vayeate\AppData\Roaming\Mozilla\Firefox\Profiles\aksozfjt.Unnamed Profile 10') should return to me 3181739213 however it's not. It keeps returning to me: -159266146140
Let's implement a more minimal C++ version first, which also dumps intermediate values which we can later compare.
#include <iostream>
#include <iomanip>
#include <stdint.h>
using namespace std;
static const uint32_t gr = 0x9E3779B9U;
template<typename T>
static uint32_t add(uint32_t hash, T val) {
const uint32_t rv = gr * (((hash << 5) | (hash >> 27)) ^ val);
cerr << dec << setw(7) << (uint32_t)val << " " << setw(14) << rv << " " << hex << rv << endl;
return rv;
}
int main() {
const auto text = string("C:\\Users\\Vayeate\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\aksozfjt.Unnamed Profile 10");
uint32_t rv = 0;
for (auto c: text) {
rv = add(rv, c);
}
cout << "Result: " << dec << setw(14) << rv << " " << hex << rv << endl;
}
Result: 3181739213 bda57ccd, so we're on the right track.
Now, for some Javascript:
GetNativePath returns an nsAutoCString aka. 8-bit string, by converting the internal 16-bit string to UTF-8.
Javascript does not actually know about 32-bit unsigned integers, just 32-bit signed integers, but there are some dirty tricks (mainly the >>> 0 "unsigned cast").
32-bit unsigned multiplication does not work, but we can actually implement that operation ourselves.
Properly escaping the backslashes \ in your test string also helps ;)
Putting these things together, I arrived at the following function, which seems to produce correct results.
/**
* Javascript implementation of
* https://hg.mozilla.org/mozilla-central/file/0cefb584fd1a/mfbt/HashFunctions.h
* aka. the mfbt hash function.
*/
let HashString = (function() {
// Note: >>>0 is basically a cast-to-unsigned for our purposes.
const encoder = new TextEncoder("utf-8");
const kGoldenRatio = 0x9E3779B9;
// Multiply two uint32_t like C++ would ;)
const mul32 = (a, b) => {
// Split into 16-bit integers (hi and lo words)
let ahi = (a >> 16) & 0xffff;
let alo = a & 0xffff;
let bhi = (b >> 16) & 0xffff
let blo = b & 0xffff;
// Compute new hi and lo seperately and recombine.
return (
(((((ahi * blo) + (alo * bhi)) & 0xffff) << 16) >>> 0) +
(alo * blo)
) >>> 0;
};
// kGoldenRatioU32 * (RotateBitsLeft32(aHash, 5) ^ aValue);
const add = (hash, val) => {
// Note, cannot >> 27 here, but / (1<<27) works as well.
let rotl5 = (
((hash << 5) >>> 0) |
(hash / (1<<27)) >>> 0
) >>> 0;
return mul32(kGoldenRatio, (rotl5 ^ val) >>> 0);
}
return function(text) {
// Convert to utf-8.
// Also decomposes the string into uint8_t values already.
let data = encoder.encode(text);
// Compute the actual hash
let rv = 0;
for (let c of data) {
rv = add(rv, c | 0);
}
return rv;
};
})();
let res = HashString('C:\\Users\\Vayeate\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\aksozfjt.Unnamed Profile 10');
console.log(res, res === 3181739213);
Might not be the most efficient implementation, but well, it works at least ;)
There is a simpler way
var file = new FileUtils.File('C:\\Users\\Vayeate\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\aksozfjt.Unnamed Profile 10');
file.QueryInterface(Ci.nsIHashable);
console.log(file.hashCode === 3181739213);

IP-addresses stored as int results in overflow?

I'm writing a chat-server in node.js, and I want to store connected users IP-addresses in a mysql database as (unsigned) integers.
I have written a javascript method to convert an ip-address as string to an integer. I get some strange results however.
Here is my code:
function ipToInt(ip) {
var parts = ip.split(".");
var res = 0;
res += parseInt(parts[0], 10) << 24;
res += parseInt(parts[1], 10) << 16;
res += parseInt(parts[2], 10) << 8;
res += parseInt(parts[3], 10);
return res;
}
When I run call the method as ipToInt("192.168.2.44"); the result I get is -1062731220.
It seems like an overflow has occurred, which is strange, because the expected output (3232236076) is inside the number range in javascript (2^52).
When I inspect -1062731220 in binary form, I can see the 3232236076 is preserved, but filled with leading 1's.
I'm not sure, but I think the problem is with signed vs. unsigned integers.
Can any of you explain what is going on?
And possibly how to parse -1062731220 back to an string ip?
Why is the converted IP negative?
It's NOT an overflow. The first part of your IP address is 192 which converts to 11000000 in binary. You then shift that all the way to the left. When there is a 1 in the leftmost position of a 32 bit number, it's negative.
How do you convert back to a string?
Do the same thing you did to convert from a string but in reverse. Shift right (and mask)!
function intToIP(int) {
var part1 = int & 255;
var part2 = ((int >> 8) & 255);
var part3 = ((int >> 16) & 255);
var part4 = ((int >> 24) & 255);
return part4 + "." + part3 + "." + part2 + "." + part1;
}
Why reinvent the wheel? From Google:
OR, you can use what I found here:
http://javascript.about.com/library/blipconvert.htm
function dot2num(dot)
{
var d = dot.split('.');
return ((((((+d[0])*256)+(+d[1]))*256)+(+d[2]))*256)+(+d[3]);
}
function num2dot(num)
{
var d = num%256;
for (var i = 3; i > 0; i--)
{
num = Math.floor(num/256);
d = num%256 + '.' + d;
}
return d;
}
The result of the "<<" operator is always a signed, 32-bit integer, as per the spec.
When you shift back, use ">>>" to do an unsigned right shift.
You might also find this pattern useful:
ip.toLong = function toInt(ip){
var ipl=0;
ip.split('.').forEach(function( octet ) {
ipl<<=8;
ipl+=parseInt(octet);
});
return(ipl >>>0);
};
ip.fromLong = function fromInt(ipl){
return ( (ipl>>>24) +'.' +
(ipl>>16 & 255) +'.' +
(ipl>>8 & 255) +'.' +
(ipl & 255) );
};
If you're using something like node.js where you can add functionality through something like Npm then you can simply do:
npm install ip
To get that functionality from the source which is here:
https://github.com/indutny/node-ip/blob/master/lib/ip.js
You will also get a bunch of other IP utility functions with that.
You shifted left to get the original number - which is just 4 sets of bits regardless of the sign.
Shift right to get back to the IP. Doesn't matter what the sign is.
const ip2int = (x) => (x.split('.').reduce((a, v) => ((a << 8) + (+v)), 0) >>> 0);
One-Liner:
const ipToLong = ip => ip.split('.').map(parseFloat).reduce((total, part) => total * 256 + part);
Use this
function num2string(ip) {
return [24,16,8,0].map(n => (ip >> n) & 0xff).join(".")
}
function string2num(ip) {
return ip.split(".").reduce((sum,x,i) => sum + (x << 8*(3-i)), 0)
}
IP Addresses in the V4 space are unsigned 32 bit numbers, hence the IP address of FF.FF.FF.FF is 2^32 and cannot be greater then that number. Please see:
This stack overflow article on the same subject
To turn that number back into an IP address you must break the number down into its 4 parts since each byte is one octet of the address so convert the number to hex and then parse out each pair. You may or may not have to add a leading zero for the first octet.
Additionally you may have to deal with byte order of the integer ( endien issues ) but since most systems are intel based these days you might not have to deal with that.
var aaa = Number("0b"+ "192.168.2.44".split(".").map(
function(dec){
return ("00000000" + Number(dec).toString(2)).slice(-8);
}).join(""));
aaa.toString(2).match(/.{1,8}/g).map(
function(bin){
return Number("0b"+bin);
}).join(".");
I revised Evan's final answer a bit, particularly dot2num. It functions the same but might be more readable and is marginally slower.
function ip2num(ip) {
var parts = ip.split('.');
var num = 0;
num += d[0] * Math.pow(2, 24);
num += d[1] * Math.pow(2, 16);
num += d[2] * Math.pow(2, 8);
num += d[3];
return num;
}
function num2ip(num) {
var ip = num % 256;
for (var i=3; i > 0; i--) {
num = Math.floor(num / 256);
ip = num % 256 + '.' + ip;
}
return ip;
}
Try this solution, it might help:
function IpToInteger(ipAddr)
{
var parts = ipAddr.split('.');
return (((parts[0] ? parts[0] << 24 : 0) |
(parts[1] ? parts[1] << 16 : 0) |
(parts[2] ? parts[2] << 8 : 0) |
(parts[3])) >>> 0);
}
function IpAddressToLong(ip){
return ip.split('.').map((octet, index, array) => {
return parseInt(octet) * Math.pow(256, (array.length - index - 1));
}).reduce((prev, curr) => {
return prev + curr;
});
}
Taken from repo
function ip2num(ip) {
var d = ip.split(".");
var num = 0;
num += Number(d[0]) * Math.pow(256, 3);
num += Number(d[1]) * Math.pow(256, 2);
num += Number(d[2]) * Math.pow(256, 1);
num += Number(d[3]);
return num;
}
function num2ip(num) {
var ip = num % 256;
for (var i = 3; i > 0; i--) {
num = Math.floor(num / 256);
ip = (num % 256) + "." + ip;
}
return ip;
}
console.log(ip2num("192.168.0.1"));
console.log(num2ip(3232235521))
<h1>YOU IS WELCOME</h1>

Categories

Resources