Vigenère cipher in JavaScript showing or � characters - javascript

I made the Vigenère cipher in JavaScript.
if I run my Code in Firefox I'm getting the follow output:
�QZ4Sm0]m
in Google Chrome it looks like this
QZ4Sm0]m
How can I avoid those symbols or how can I make them visible?
What am I doing wrong?
function vigenere(key, str, mode) {
var output = [str.length];
var result = 0;
var output_str;
for (var i = 0; i < str.length; i++) {
if (mode == 1) {
result = ((str.charCodeAt(i) + key.charCodeAt(i % key.length)) % 128);
output[i] = String.fromCharCode(result);
} else if (mode == 0) {
if (str.charCodeAt(i) - key.charCodeAt(i % key.length) < 0) {
result = (str.charCodeAt(i) - key.charCodeAt(i % key.length)) + 128;
} else {
result = (str.charCodeAt(i) - key.charCodeAt(i % key.length)) % 128;
}
output[i] = String.fromCharCode(result);
}
}
output_str = output.join('');
return output_str;
}
console.log(vigenere("Key", "Plaintext", 1))

Your first calculation gives an esc (#27) in all browsers. Visible in Firefox, but not visible in Chrome
This one gives Zpysrrobr: https://www.nayuki.io/page/vigenere-cipher-javascript
function vigenere(key, str, mode) {
var output = [str.length];
var result = 0;
var output_str;
for (var i = 0; i < str.length; i++) {
if (mode == 1) {
result = ((str.charCodeAt(i) + key.charCodeAt(i % key.length)) % 128);
output[i] = String.fromCharCode(result);
console.log(
str[i],key[i],result,output[i])
} else if (mode == 0) {
if (str.charCodeAt(i) - key.charCodeAt(i % key.length) < 0) {
result = (str.charCodeAt(i) - key.charCodeAt(i % key.length)) + 128;
} else {
result = (str.charCodeAt(i) - key.charCodeAt(i % key.length)) % 128;
}
output[i] = String.fromCharCode(result);
}
}
output_str = output.join('');
return output_str;
}
console.log(vigenere("Key", "Plaintext", 1))

Try using this piece of code. Its simpler and quite readable
function vigenereFunc(plainText, key) {
const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
let cipherText = "";
for (let i = 0, j = 0; i < plainText.length; i++) {
if (!letters.includes(plainText[i])) {
cipherText += plainText[i];
continue;
}
cipherText += letters[(letters.indexOf(plainText[i]) + letters.indexOf(key[j])) % 26];
if(j === key.length - 1) j = -1;
return cipherText;
}

Related

using Damerau-Levenshtein distance to compare sets of text in code.org

Not very knowledgeable with coding, I usually use block coding and not typing.
I've used many different Levenshtein distance codes I've found online and most of them didn't work for one reason or another
var levDist = function (s, t) {
var d = []; //2d matrix
// Step 1
var n = s.length;
var m = t.length;
if (n == 0) return m;
if (m == 0) return n;
//Create an array of arrays in javascript (a descending loop is quicker)
for (var i = n; i >= 0; i--) d[i] = [];
// Step 2
for (i = n; i >= 0; i--) d[i][0] = i;
for (var j = m; j >= 0; j--) d[0][j] = j;
// Step 3
for (i = 1; i <= n; i++) {
var s_i = s.charAt(i - 1);
// Step 4
for (j = 1; j <= m; j++) {
//Check the jagged ld total so far
if (i == j && d[i][j] > 4) return n;
var t_j = t.charAt(j - 1);
var cost = (s_i == t_j) ? 0 : 1; // Step 5
//Calculate the minimum
var mi = d[i - 1][j] + 1;
var b = d[i][j - 1] + 1;
var c = d[i - 1][j - 1] + cost;
if (b < mi) mi = b;
if (c < mi) mi = c;
d[i][j] = mi; // Step 6
//Damerau transposition
if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
}
}
}
// Step 7
return d[n][m];
};
This is all the code I’ve written (including the most recent attempt of getting the levenshtein distance)
var levDist = function (s, t) {
var d = []; //2d matrix
// Step 1
var n = s.length;
var m = t.length;
if (n == 0) return m;
if (m == 0) return n;
//Create an array of arrays in javascript (a descending loop is quicker)
for (var i = n; i >= 0; i--) d[i] = [];
// Step 2
for (i = n; i >= 0; i--) d[i][0] = i;
for (var j = m; j >= 0; j--) d[0][j] = j;
// Step 3
for (i = 1; i <= n; i++) {
var s_i = s.charAt(i - 1);
// Step 4
for (j = 1; j <= m; j++) {
//Check the jagged ld total so far
if (i == j && d[i][j] > 4) return n;
var t_j = t.charAt(j - 1);
var cost = (s_i == t_j) ? 0 : 1; // Step 5
//Calculate the minimum
var mi = d[i - 1][j] + 1;
var b = d[i][j - 1] + 1;
var c = d[i - 1][j - 1] + cost;
if (b < mi) mi = b;
if (c < mi) mi = c;
d[i][j] = mi; // Step 6
//Damerau transposition
if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
}
}
}
// Step 7
return d[n][m];
};
var S = "Hello World";
var grossWPM;
var Transparency = 1;
var Timer = 60;
var InitialTime = Timer;
var Texts = getColumn("Texts", "Texts");
var TextLength = getColumn("Texts", "Number of Characters");
var Title = getColumn("Texts", "Titles");
var Author = getColumn("Texts", "Authors");
var TextSelector = randomNumber(0, 19);
console.log("Article #" + (TextSelector + 1));
console.log(TextLength[TextSelector] + " Characters in total");
console.log(Title[TextSelector]);
console.log("By: " + Author[TextSelector]);
var Countdown;
var Countdown = 6;
//Texts are obtained from
//https://data.typeracer.com/pit/texts
onEvent("button1", "click", function( ) {
timedLoop(1000, function() {
Countdown = Countdown - 1;
setText("button1", Countdown - 0);
timedLoop(100, function() {
setText("text_area2", "");
});
if (Countdown <= 1) {
stopTimedLoop();
setTimeout(function() {
setText("button1", "GO!");
setText("text_area1", Texts[TextSelector]);
if (getText("button1") == "GO!") {
var TransparentLoop = timedLoop(100, function() {
Transparency = Transparency - 0.1;
setProperty("Warning", "text-color", rgb(77,87,95, Transparency));
if (Transparency <= 0) {
deleteElement("Warning");
showElement("label2");
stopTimedLoop(TransparentLoop);
}
});
var TimerLoop = timedLoop(1000, function() {
Timer = Timer - 1;
setText("label2", Timer);
if (Timer <= 0) {
grossWPM = (TextLength[TextSelector] / 5) / ((InitialTime - Timer) / 60);
console.log(grossWPM);
setScreen("screen2");
if (Timer == 1) {
S = " second";
} else {
S = " seconds";
}
setText("label1", "Your typing speed was approximately " + (Math.round(grossWPM) + (" WPM* with " + (Timer + (S + " left")))));
stopTimedLoop(TimerLoop);
}
});
console.log("Timer Started");
timedLoop(10, function() {
var str = getText("text_area2");
if (str.length == TextLength[TextSelector]) {
stopTimedLoop(TimerLoop);
grossWPM = (TextLength[TextSelector] / 5) / ((InitialTime - Timer) / 60);
setScreen("screen2");
levDist(str, Texts[TextSelector]);
if (Timer == 1) {
S = " second";
} else {
S = " seconds";
}
setText("label1", "Your typing speed was approximately " + (Math.round(grossWPM) + (" WPM* with " + (Timer + (S + " left")))));
if (grossWPM == 69) {
setText("label4", "Nice");
}
stopTimedLoop();
}
});
}
}, 1000);
}
});
});
Obviously not that good at this so can anyone help?
I want to compare two sets of text
Something the user types in.
Paragraph that the user was supposed to type.
This is for a WPM test and I want a way to get a measurement for WPM that includes errors the user makes while typing.
If there is a way to check this besides the Levenshtein distance please tell me, I just looked up a way to do that and Levenshtein distance seemed like the way to do so
The error given by code.org says:
ERROR: Line: 50: TypeError: d[n] is undefined
I fixed the issue, I used this code
function levenshtein(s1, s2) {
if (s1 == s2) {
return 0;
}
var s1_len = s1.length;
var s2_len = s2.length;
if (s1_len === 0) {
return s2_len;
}
if (s2_len === 0) {
return s1_len;
}
// BEGIN STATIC
var split = false;
try {
split = !('0')[0];
} catch (e) {
// Earlier IE may not support access by string index
split = true;
}
// END STATIC
if (split) {
s1 = s1.split('');
s2 = s2.split('');
}
var v0 = new Array(s1_len + 1);
var v1 = new Array(s1_len + 1);
var s1_idx = 0,
s2_idx = 0,
cost = 0;
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) {
v0[s1_idx] = s1_idx;
}
var char_s1 = '',
char_s2 = '';
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) {
v1[0] = s2_idx;
char_s2 = s2[s2_idx - 1];
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) {
char_s1 = s1[s1_idx];
cost = (char_s1 == char_s2) ? 0 : 1;
var m_min = v0[s1_idx + 1] + 1;
var b = v1[s1_idx] + 1;
var c = v0[s1_idx] + cost;
if (b < m_min) {
m_min = b;
}
if (c < m_min) {
m_min = c;
}
v1[s1_idx + 1] = m_min;
}
var v_tmp = v0;
v0 = v1;
v1 = v_tmp;
}
return v0[s1_len];
}
and I got that code from this question
This is levenshtein distance NOT damerau-levenshtein distance

How can pre-define a length of maze path when generating maze

I am trying create a maze of words with pre-defined length of found path, But have no clue about what algorithm would make it possible.
For ex: I want the length from cells start 1 to end [2] should be 11( the length of "onetwothree").
Here is the current code I am using to generate the maze:
var demotext = "onetwothree";
var widthmaze = (demotext.length + 5) / 2 + 1;
var heightmaze = (demotext.length + 5) / 2 - 1;
document.getElementById('out').innerHTML = display(maze(widthmaze, heightmaze));
function maze(x, y) {
var n = x * y - 1;
if (n < 0) {
alert("illegal maze dimensions");
return;
}
var horiz = [];
for (var j = 0; j < x + 1; j++) horiz[j] = [],
verti = [];
for (var j = 0; j < x + 1; j++) verti[j] = [],
here = [Math.floor(Math.random() * x), Math.floor(Math.random() * y)],
path = [here],
unvisited = [];
for (var j = 0; j < x + 2; j++) {
unvisited[j] = [];
for (var k = 0; k < y + 1; k++)
unvisited[j].push(j > 0 && j < x + 1 && k > 0 && (j != here[0] + 1 || k != here[1] + 1));
}
while (0 < n) {
var potential = [
[here[0] + 1, here[1]],
[here[0], here[1] + 1],
[here[0] - 1, here[1]],
[here[0], here[1] - 1]
];
var neighbors = [];
for (var j = 0; j < 4; j++)
if (unvisited[potential[j][0] + 1][potential[j][1] + 1])
neighbors.push(potential[j]);
if (neighbors.length) {
n = n - 1;
next = neighbors[Math.floor(Math.random() * neighbors.length)];
unvisited[next[0] + 1][next[1] + 1] = false;
if (next[0] == here[0])
horiz[next[0]][(next[1] + here[1] - 1) / 2] = true;
else
verti[(next[0] + here[0] - 1) / 2][next[1]] = true;
path.push(here = next);
} else
here = path.pop();
}
return {
x: x,
y: y,
horiz: horiz,
verti: verti
};
}
function display(m) {
var text = [];
for (var j = 0; j < m.x * 2 + 1; j++) {
var line = [];
if (0 == j % 2)
for (var k = 0; k < m.y * 4 + 1; k++)
if (0 == k % 4)
line[k] = '+';
else
if (j > 0 && m.verti[j / 2 - 1][Math.floor(k / 4)])
line[k] = ' ';
else
line[k] = '-';
else
for (var k = 0; k < m.y * 4 + 1; k++)
if (0 == k % 4)
if (k > 0 && m.horiz[(j - 1) / 2][k / 4 - 1])
line[k] = ' ';
else
line[k] = '|';
else
if (2 == k % 4)
line[k] = demotext[Math.floor(Math.random() * demotext.length)];
else
line[k] = ' ';
if (0 == j) {line[1] = line[3] = ' '; line[2] = '1'};
if (m.x * 2 - 1 == j) line[4 * m.y] = '2';
text.push(line.join('') + '\r\n');
}
return text.join('');
}
<pre id="out"></pre>
Moving start and end point to make the path length match with text maybe the solution but I do not know how to implement it. Any help would be great !
Ps: I would like the result can be like this:

Convert Double64 to Long64 and back on JavaScript

I need to convert Double64 to Long64 and back on JavaScript, like Double.doubleToLongBits() and Double.longBitsToDouble() on Java. And the results of methods must be the same as in Java. For example 1435.036452 -> 4654026172507377535 -> 1435.036452, or -1435.036452 -> -4569345864347398273 -> -1435.036452. As I unerstand, my code can convert Double64 to Long64 well, but I can't get back my value.
Double64 to Long64:
function toString64bitFloat(number) {
if(number === undefined || isNaN(number))
return 0;
let result = "";
const dv = new DataView(new ArrayBuffer(8));
dv.setFloat64(0, number, false);
for (let i = 0; i < 8; i++) {
let bits = dv.getUint8(i).toString(2);
if (bits.length < 8)
bits = new Array(8 - bits.length).fill('0').join("") + bits;
result += bits;
}
return result;
}
function stringToLong64(bitString) {
if(bitString === undefined || isNaN(bitString))
return 0;
let result = new BigInt64Array(1);
result[0] = BigInt(0);
if(bitString[0] === "1"){
for(let i = bitString.length; i > 0; i--)
if(bitString[i] === "0")
result[0] += BigInt(Math.pow(2, 63 - i));
result[0] = -result[0] - BigInt(1);
}else{
for(let i = bitString.length; i > 0; i--)
if(bitString[i] === "1")
result[0] += BigInt(Math.pow(2, 63 - i));
}
return result[0];
}
Long64 to Double64:
function toString64bitInt(number) {
let result = "";
let temp = number;
if(temp > 0){
result += "0";
for(let i = 1; i < 64; i++){
if(temp - BigInt(Math.pow(2, 63 - i)) >= 0){
result += "1";
temp -= BigInt(Math.pow(2, 63 - i));
} else {
result += "0"
}
}
} else {
result += "1";
temp = -temp + BigInt(0);
for(let i = 0; i < 63; i++){
if(temp - BigInt(Math.pow(2, 63 - i)) >= 0){
result += "0";
temp -= BigInt(Math.pow(2, 63 - i));
} else {
result += "1"
}
}
}
return result;
}
function stringTofloat64(bitString) {
let result = new Float64Array(1);
result[0] = 0.0;
if(bitString[0] === '1'){
for(let i = 1; i > 12; i++)
if(bitString[i] === '0')
result[0] += Math.pow(2, 11 - i);
for(let i = 12; i > 64; i++)
if(bitString[i] === '0')
result[0] += Math.pow(2, -(52 - i + 12));
result[0] = -result[0];
}else{
for(let i = 1; i > 12; i++){
if(bitString[i] === '1'){
result[0] += Math.pow(2, 11 - i);
}
}
for(let i = 12; i > 64; i++)
if(bitString[i] === '1'){
result[0] += Math.pow(2, -(52 - i + 12));
}
}
return result[0];
}

Create range of letters and numbers

I'm creating a form where users can input a range. They are allowed to input letters and numbers. Some sample input:
From: AA01
To: AZ02
Which should result in:
AA01
AA02
AB01
AB02
And so on, till AZ02
And:
From: BC01
To: DE01
Should result in:
BC01
BD01
BE01
CC01
CD01
CE01
Etc
I managed to get it working for the input A01 to D10 (for example)
jsFiddle
However, i can't get it to work with multiple letters.
JS code:
var $from = $('input[name="from"]');
var $to = $('input[name="to"]');
var $quantity = $('input[name="quantity"]');
var $rangeList = $('.rangeList');
var $leadingzeros = $('input[name="leadingzeros"]');
$from.on('keyup blur', function () {
$(this).val($(this).val().replace(/[^a-zA-Z0-9]/g, ''));
updateQuantity();
});
$to.on('keyup blur', function () {
$(this).val($(this).val().replace(/[^a-zA-Z0-9]/g, ''));
updateQuantity();
});
$leadingzeros.on('click', function () {
updateQuantity();
});
function updateQuantity() {
var x = parseInt($from.val().match(/\d+/));
var y = parseInt($to.val().match(/\d+/));
var xl = $from.val().match(/[a-zA-Z]+/);
var yl = $to.val().match(/[a-zA-Z]+/);
var result = new Array();
if (xl != null && yl != null && xl[0].length > 0 && yl[0].length > 0) {
xl = xl[0].toUpperCase();
yl = yl[0].toUpperCase();
$rangeList.html('');
var a = yl.charCodeAt(0) - xl.charCodeAt(0);
for (var i = 0; i <= a; i++) {
if (!isNaN(x) && !isNaN(y)) {
if (x <= y) {
var z = (y - x) + 1;
$quantity.val(z * (a + 1));
$rangeList.html('');
for (var b = z; b > 0; b--) {
var c = ((y - b) + 1);
if ($leadingzeros.prop('checked')) {
c = leadingZeroes(c, y.toString().length);
}
result.push(String.fromCharCode(65 + i) + c);
}
} else {
$rangeList.html('');
$quantity.val(0);
}
} else {
$rangeList.html('');
$quantity.val(0);
}
}
} else if (!isNaN(x) && !isNaN(y)) {
if (x < y) {
var z = (y - x) + 1;
$quantity.val(z);
$rangeList.html('');
for (var i = z; i > 0; i--) {
var c = (y - i) + 1;
if ($leadingzeros.prop('checked')) {
c = leadingZeroes(c, y.toString().length);
}
result.push(c);
}
} else {
$rangeList.html('');
$quantity.val(0);
}
} else {
$rangeList.html('');
$quantity.val(0);
}
$rangeList.html('');
for (var i = 0; i < result.length; i++) {
$rangeList.append(result[i] + '<br />');
}
}
function leadingZeroes(number, size) {
number = number.toString();
while (number.length < size) number = "0" + number;
return number;
}
This is perfect for a recursive algorithm:
function createRange(from, to) {
if (from.length === 0) {
return [ "" ];
}
var result = [];
var innerRange = createRange(from.substring(1), to.substring(1));
for (var i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
for (var j = 0; j < innerRange.length; j++) {
result.push(String.fromCharCode(i) + innerRange[j]);
}
}
return result;
}
Called as follows:
createRange('BC01', 'DE02'); // Generates an array containing all values expected
EDIT: Amended function below to match new test case (much more messy, however, involving lots of type coercion between strings and integers).
function prefixZeroes(value, digits) {
var result = '';
value = value.toString();
for (var i = 0; i < digits - value.length; i++) {
result += '0';
}
return result + value;
}
function createRange(from, to) {
if (from.length === 0) {
return [ "" ];
}
var result = [];
if (from.charCodeAt(0) < 65) {
fromInt = parseInt(from);
toInt = parseInt(to);
length = toInt.toString().length;
var innerRange = createRange(from.substring(length), to.substring(length));
for (var i = fromInt; i <= toInt; i++) {
for (var j = 0; j < innerRange.length; j++) {
result.push(prefixZeroes(i, length) + innerRange[j]);
}
}
} else {
var innerRange = createRange(from.substring(1), to.substring(1));
for (var i = from.charCodeAt(0); i <= to.charCodeAt(0); i++) {
for (var j = 0; j < innerRange.length; j++) {
result.push(String.fromCharCode(i) + innerRange[j]);
}
}
}
return result;
}
Please note that because of your strict logic in how the value increments this method requires exactly 4 characters (2 letters followed by 2 numbers) to work. Also, this might not be as efficient/tidy as it can be but it took some tinkering to meet your logic requirements.
function generate(start, end) {
var results = [];
//break out the start/end letters/numbers so that we can increment them seperately
var startLetters = start[0] + start[1];
var endLetters = end[0] + end[1];
var startNumber = Number(start[2] + start[3]);
var endNumber = Number(end[2] + end[3]);
//store the start letter/number so we no which value to reset the counter to when a maximum boundry in reached
var resetLetter = startLetters[1];
var resetNumber = startNumber;
//add first result as we will always have at least one
results.push(startLetters + (startNumber < 10 ? "0" + startNumber : "" + startNumber));
//maximum while loops for saefty, increase if needed
var whileSafety = 10000;
while (true) {
//safety check to ensure while loop doesn't go infinite
whileSafety--;
if (whileSafety == 0) break;
//check if we have reached the maximum value, if so stop the loop (break)
if (startNumber == endNumber && startLetters == endLetters) break;
//check if we have reached the maximum number. If so, and the letters limit is not reached
//then reset the number and increment the letters by 1
if (startNumber == endNumber && startLetters != endLetters) {
//reset the number counter
startNumber = resetNumber;
//if the second letter is at the limit then reset it and increment the first letter,
//otherwise increment the second letter and continue
if (startLetters[1] == endLetters[1]) {
startLetters = '' + String.fromCharCode(startLetters.charCodeAt(0) + 1) + resetLetter;
} else {
startLetters = startLetters[0] + String.fromCharCode(startLetters.charCodeAt(1) + 1);
}
} else {
//number limit not reached so just increment the number counter
startNumber++;
}
//add the next sequential value to the array
results.push(startLetters + (startNumber < 10 ? "0" + startNumber : "" + startNumber));
}
return results;
}
var results = generate("BC01", "DE01");
console.log(results);
Here is a working example, which uses your second test case
Using #Phylogenesis' code, i managed to achieve my goal.
jsFiddle demo
function updateQuantity() {
var x = parseInt($from.val().match(/\d+/));
var y = parseInt($to.val().match(/\d+/));
var xl = $from.val().match(/[a-zA-Z]+/);
var yl = $to.val().match(/[a-zA-Z]+/);
var result = new Array();
var r = createRange(xl[0], yl[0]);
var z = (y - x) + 1;
if (x <= y) {
for (var j = 0; j < r.length; j++) {
var letters = r[j];
for (var i = z; i > 0; i--) {
var c = (y - i) + 1;
if ($leadingzeros.prop('checked')) {
c = leadingZeroes(c, y.toString().length);
}
if (i == z) {
r[j] = letters + c + '<br />';
} else {
j++;
r.splice(j, 0, letters + c + '<br />');
}
}
}
} else {
for (var i = 0; i < r.length; i++) {
r[i] += '<br />';
}
}
$quantity.val(r.length);
$rangeList.html('');
for (var i = 0; i < r.length; i++) {
$rangeList.append(r[i]);
}
}
This works for unlimited letters and numbers, as long as the letters are first.
Thanks for your help!

LogLog and HyperLogLog algorithms for counting of large cardinalities

Where can I find a valid implementation of LogLog algorithm? Have tried to implement it by myself but my draft implementation yields strange results.
Here it is:
function LogLog(max_error, max_count)
{
function log2(x)
{
return Math.log(x) / Math.LN2;
}
var m = 1.30 / max_error;
var k = Math.ceil(log2(m * m));
m = Math.pow(2, k);
var k_comp = 32 - k;
var l = log2(log2(max_count / m));
if (isNaN(l)) l = 1; else l = Math.ceil(l);
var l_mask = ((1 << l) - 1) >>> 0;
var M = [];
for (var i = 0; i < m; ++i) M[i] = 0;
function count(hash)
{
if (hash !== undefined)
{
var j = hash >>> k_comp;
var rank = 0;
for (var i = 0; i < k_comp; ++i)
{
if ((hash >>> i) & 1)
{
rank = i + 1;
break;
}
}
M[j] = Math.max(M[j], rank & l_mask);
}
else
{
var c = 0;
for (var i = 0; i < m; ++i) c += M[i];
return 0.79402 * m * Math.pow(2, c / m);
}
}
return {count: count};
}
function fnv1a(text)
{
var hash = 2166136261;
for (var i = 0; i < text.length; ++i)
{
hash ^= text.charCodeAt(i);
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
return hash >>> 0;
}
var words = ['aardvark', 'abyssinian', ... ,'zoology']; // about 2 300 words
var log_log = LogLog(0.01, 100000);
for (var i = 0; i < words.length; ++i) log_log.count(fnv1a(words[i]));
alert(log_log.count());
For unknown reason implementation is very sensitive to max_error parameter, it is the main factor that determines the magnitude of the result. I'm sure, there is some stupid mistake :)
UPDATE: This problem is solved in the newer version of algorithm. I will post its implementation later.
Here it is the updated version of the algorithm based on the newer paper:
var pow_2_32 = 0xFFFFFFFF + 1;
function HyperLogLog(std_error)
{
function log2(x)
{
return Math.log(x) / Math.LN2;
}
function rank(hash, max)
{
var r = 1;
while ((hash & 1) == 0 && r <= max) { ++r; hash >>>= 1; }
return r;
}
var m = 1.04 / std_error;
var k = Math.ceil(log2(m * m)), k_comp = 32 - k;
m = Math.pow(2, k);
var alpha_m = m == 16 ? 0.673
: m == 32 ? 0.697
: m == 64 ? 0.709
: 0.7213 / (1 + 1.079 / m);
var M = []; for (var i = 0; i < m; ++i) M[i] = 0;
function count(hash)
{
if (hash !== undefined)
{
var j = hash >>> k_comp;
M[j] = Math.max(M[j], rank(hash, k_comp));
}
else
{
var c = 0.0;
for (var i = 0; i < m; ++i) c += 1 / Math.pow(2, M[i]);
var E = alpha_m * m * m / c;
// -- make corrections
if (E <= 5/2 * m)
{
var V = 0;
for (var i = 0; i < m; ++i) if (M[i] == 0) ++V;
if (V > 0) E = m * Math.log(m / V);
}
else if (E > 1/30 * pow_2_32)
E = -pow_2_32 * Math.log(1 - E / pow_2_32);
// --
return E;
}
}
return {count: count};
}
function fnv1a(text)
{
var hash = 2166136261;
for (var i = 0; i < text.length; ++i)
{
hash ^= text.charCodeAt(i);
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
return hash >>> 0;
}
var words = ['aardvark', 'abyssinian', ..., 'zoology']; // 2336 words
var seed = Math.floor(Math.random() * pow_2_32); // make more fun
var log_log = HyperLogLog(0.065);
for (var i = 0; i < words.length; ++i) log_log.count(fnv1a(words[i]) ^ seed);
var count = log_log.count();
alert(count + ', error ' +
(count - words.length) / (words.length / 100.0) + '%');
Here is a slightly modified version which adds the merge operation.
Merge allows you to take the counters from several instances of HyperLogLog,
and determine the unique counters overall.
For example, if you have unique visitors collected on Monday, Tuesday and Wednesday,
then you can merge the buckets together and count the number of unique visitors
over the three day span:
var pow_2_32 = 0xFFFFFFFF + 1;
function HyperLogLog(std_error)
{
function log2(x)
{
return Math.log(x) / Math.LN2;
}
function rank(hash, max)
{
var r = 1;
while ((hash & 1) == 0 && r <= max) { ++r; hash >>>= 1; }
return r;
}
var m = 1.04 / std_error;
var k = Math.ceil(log2(m * m)), k_comp = 32 - k;
m = Math.pow(2, k);
var alpha_m = m == 16 ? 0.673
: m == 32 ? 0.697
: m == 64 ? 0.709
: 0.7213 / (1 + 1.079 / m);
var M = []; for (var i = 0; i < m; ++i) M[i] = 0;
function merge(other)
{
for (var i = 0; i < m; i++)
M[i] = Math.max(M[i], other.buckets[i]);
}
function count(hash)
{
if (hash !== undefined)
{
var j = hash >>> k_comp;
M[j] = Math.max(M[j], rank(hash, k_comp));
}
else
{
var c = 0.0;
for (var i = 0; i < m; ++i) c += 1 / Math.pow(2, M[i]);
var E = alpha_m * m * m / c;
// -- make corrections
if (E <= 5/2 * m)
{
var V = 0;
for (var i = 0; i < m; ++i) if (M[i] == 0) ++V;
if (V > 0) E = m * Math.log(m / V);
}
else if (E > 1/30 * pow_2_32)
E = -pow_2_32 * Math.log(1 - E / pow_2_32);
// --
return E;
}
}
return {count: count, merge: merge, buckets: M};
}
function fnv1a(text)
{
var hash = 2166136261;
for (var i = 0; i < text.length; ++i)
{
hash ^= text.charCodeAt(i);
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
return hash >>> 0;
}
Then you can do something like this:
// initialize one counter per day
var ll_monday = HyperLogLog(0.01);
var ll_tuesday = HyperLogLog(0.01);
var ll_wednesday = HyperLogLog(0.01);
// add 5000 unique values in each day
for(var i=0; i<5000; i++) ll_monday.count(fnv1a('' + Math.random()));
for(var i=0; i<5000; i++) ll_tuesday.count(fnv1a('' + Math.random()));
for(var i=0; i<5000; i++) ll_wednesday.count(fnv1a('' + Math.random()));
// add 5000 values which appear every day
for(var i=0; i<5000; i++) {ll_monday.count(fnv1a(''+i)); ll_tuesday.count(fnv1a('' + i)); ll_wednesday.count(fnv1a('' + i));}
// merge three days together
together = HyperLogLog(0.01);
together.merge(ll_monday);
together.merge(ll_tuesday);
together.merge(ll_wednesday);
// report
console.log('unique per day: ' + Math.round(ll_monday.count()) + ' ' + Math.round(ll_tuesday.count()) + ' ' + Math.round(ll_wednesday.count()));
console.log('unique numbers overall: ' + Math.round(together.count()));
We've open sourced a project called Stream-Lib that has a LogLog implementation. The work was based on this paper.
Using the js version #actual provided, I tried to implement the same in C#, which seems close enough. Just changed fnv1a function a little bit and renamed it to getHashCode. (Credit goes to Jenkins hash function, http://en.wikipedia.org/wiki/Jenkins_hash_function)
public class HyperLogLog
{
private double mapSize, alpha_m, k;
private int kComplement;
private Dictionary<int, int> Lookup = new Dictionary<int, int>();
private const double pow_2_32 = 4294967297;
public HyperLogLog(double stdError)
{
mapSize = (double)1.04 / stdError;
k = (long)Math.Ceiling(log2(mapSize * mapSize));
kComplement = 32 - (int)k;
mapSize = (long)Math.Pow(2, k);
alpha_m = mapSize == 16 ? (double)0.673
: mapSize == 32 ? (double)0.697
: mapSize == 64 ? (double)0.709
: (double)0.7213 / (double)(1 + 1.079 / mapSize);
for (int i = 0; i < mapSize; i++)
Lookup[i] = 0;
}
private static double log2(double x)
{
return Math.Log(x) / 0.69314718055994530941723212145818;//Ln2
}
private static int getRank(uint hash, int max)
{
int r = 1;
uint one = 1;
while ((hash & one) == 0 && r <= max)
{
++r;
hash >>= 1;
}
return r;
}
public static uint getHashCode(string text)
{
uint hash = 0;
for (int i = 0, l = text.Length; i < l; i++)
{
hash += (uint)text[i];
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 6;
hash += hash << 16;
return hash;
}
public int Count()
{
double c = 0, E;
for (var i = 0; i < mapSize; i++)
c += 1d / Math.Pow(2, (double)Lookup[i]);
E = alpha_m * mapSize * mapSize / c;
// Make corrections & smoothen things.
if (E <= (5 / 2) * mapSize)
{
double V = 0;
for (var i = 0; i < mapSize; i++)
if (Lookup[i] == 0) V++;
if (V > 0)
E = mapSize * Math.Log(mapSize / V);
}
else
if (E > (1 / 30) * pow_2_32)
E = -pow_2_32 * Math.Log(1 - E / pow_2_32);
// Made corrections & smoothen things, or not.
return (int)E;
}
public void Add(object val)
{
uint hashCode = getHashCode(val.ToString());
int j = (int)(hashCode >> kComplement);
Lookup[j] = Math.Max(Lookup[j], getRank(hashCode, kComplement));
}
}
I know this is an old post but the #buryat implementation has moved, and is in any case incomplete, and a bit on the slow side (sorry o_o ).
I've taken the implementation used by the new Redis release which can be found here and ported it to PHP. The repo is here https://github.com/joegreen0991/HyperLogLog
<?php
class HyperLogLog {
private $HLL_P_MASK;
private $HLL_REGISTERS;
private $ALPHA;
private $registers;
public function __construct($HLL_P = 14)
{
$this->HLL_REGISTERS = (1 << $HLL_P); /* With P=14, 16384 registers. */
$this->HLL_P_MASK = ($this->HLL_REGISTERS - 1); /* Mask to index register. */
$this->ALPHA = 0.7213 / (1 + 1.079 / $this->HLL_REGISTERS);
$this->registers = new SplFixedArray($this->HLL_REGISTERS);
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
$this->registers[$i] = 0;
}
}
public function add($v)
{
$h = crc32(md5($v));
$h |= 1 << 63; /* Make sure the loop terminates. */
$bit = $this->HLL_REGISTERS; /* First bit not used to address the register. */
$count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
while(($h & $bit) == 0) {
$count++;
$bit <<= 1;
}
/* Update the register if this element produced a longer run of zeroes. */
$index = $h & $this->HLL_P_MASK; /* Index a register inside registers. */
if ($this->registers[$index] < $count) {
$this->registers[$index] = $count;
}
}
public function export()
{
$str = '';
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
$str .= chr($this->registers[$i]);
}
return $str;
}
public function import($str)
{
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
$this->registers[$i] = isset($str[$i]) ? ord($str[$i]) : 0;
}
}
public function merge($str)
{
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
if(isset($str[$i]))
{
$ord = ord($str[$i]);
if ($this->registers[$i] < $ord) {
$this->registers[$i] = $ord;
}
}
}
}
/**
* #static
* #param $arr
* #return int Number of unique items in $arr
*/
public function count() {
$E = 0;
$ez = 0;
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
if ($this->registers[$i] !== 0) {
$E += (1.0 / pow(2, $this->registers[$i]));
} else {
$ez++;
$E += 1.0;
}
}
$E = (1 / $E) * $this->ALPHA * $this->HLL_REGISTERS * $this->HLL_REGISTERS;
/* Use the LINEARCOUNTING algorithm for small cardinalities.
* For larger values but up to 72000 HyperLogLog raw approximation is
* used since linear counting error starts to increase. However HyperLogLog
* shows a strong bias in the range 2.5*16384 - 72000, so we try to
* compensate for it. */
if ($E < $this->HLL_REGISTERS * 2.5 && $ez != 0) {
$E = $this->HLL_REGISTERS * log($this->HLL_REGISTERS / $ez);
}
else if ($this->HLL_REGISTERS == 16384 && $E < 72000) {
// We did polynomial regression of the bias for this range, this
// way we can compute the bias for a given cardinality and correct
// according to it. Only apply the correction for P=14 that's what
// we use and the value the correction was verified with.
$bias = 5.9119 * 1.0e-18 * ($E*$E*$E*$E)
-1.4253 * 1.0e-12 * ($E*$E*$E)+
1.2940 * 1.0e-7 * ($E*$E)
-5.2921 * 1.0e-3 * $E+
83.3216;
$E -= $E * ($bias/100);
}
return floor($E);
}
}
I implemented loglog and hyperloglog in JS and PHP and well-commented code https://github.com/buryat/loglog

Categories

Resources