Javascript Memoization in Browser Not Seeing Speed Up - javascript

I am attempting to memoize a function in javascript, to be run in browser, client side. Writing this function in R (the language I am most comfortable using). In R, I see significant benefits from using memoization (4 minutes run time to 0.02 seconds for P_n(7/10, 20, 15, 6, 1) ). When I rewrite the function in javascript, I see almost no benefit. What is the problem with my javascript code (or am I going about this the wrong way entirely)?
Below are the memoized functions in R and javascript respectively. The R function (first of the two) runs very fast compared to the naive recursion, while javascript essentially sees no difference. Some amount of memoization is happening, however, because if I run the exact same function call twice, i.e. P_memo(7/10, 20, 15, 6, 1) and then P_memo(7/10, 20, 15, 6, 1) again, the second call takes 0 time. The first call should be dramatically quicker due to re-use of intermediate calls in the recursion.
P_n <- (function() {
# Memoization handled through the use of cache
cache <- NULL
cache_reset <- function() {
cache <<- new.env(TRUE, emptyenv())
}
cache_set <- function(key, value) {
assign(key, value, envir = cache)
}
cache_get <- function(key) {
get(key, envir = cache, inherits = FALSE)
}
cache_has_key <- function(key) {
exists(key, envir = cache, inherits = FALSE)
}
# Initialize the cache
cache_reset()
# This is the function that gets returned by the anonymous function and
# becomes P_n
function(rho, n, i, k, s) {
nc <- paste(rho, n, i, k, s)
# Handle "vectors" by element
if(length(n) > 1){
return(lapply(n, function(n) sapply(n, P_n, rho = rho, i = 1:(n+k), k = k, s = s)))
}
if (length(i) > 1) {
return(sapply(i, P_n, rho = rho, n = n, k = k, s = s))
}
# Cached cases
if (cache_has_key(nc))
return(cache_get(nc))
# Everything else
#proposition 1
if(i == (n+k)){
#print('Proposition 1')
if(k >= s){
return((rho / (rho + 1))^n)
}else if( (k+n) <= s){
product_iter = 1
for(j in 1:n){
product_iter = product_iter * ( rho + (k + j - 1)/s )
}
out = rho^n / product_iter
cache_set(nc, out)
return(out)
}else if( k < s & s < (k + n)){
product_iter2 = 1
for(j in 1:(s-k)){
product_iter2 = product_iter2 * ( rho + (k + j - 1)/s )
}
product_denom = ((rho + 1)^(n-s+k)) * product_iter2
out = rho^n / product_denom
cache_set(nc, out)
return(out)
}
}
#proposition 2
else if(k == 0 & n == i){
#print('Proposition 2')
if(n <= s){
product_iter11 = 1
for(j in 1:n){
product_iter11 = product_iter11 * (rho + (j - 1)/s)
}
return(rho^n / product_iter11)
}else if(n > s){
product_iter12 = 1
for(j in 1:s){
product_iter12 = product_iter12 * ( rho + (j - 1)/s )
}
product_denom12 = ((rho + 1)^(n-s)) * product_iter12
out = rho^n / product_denom12
cache_set(nc, out)
return(out)
}
}
#if i = 1
else if(i == 1){
upsum = 0
for(j in 2:(n + k)){
upsum = upsum + P_n(rho, n, j, k, s)
}
out = 1 - upsum
cache_set(nc, out)
return(out)
}
#proposition 3
else if(n == 1 & 2 <= i & i <= k){
#print('Proposition 3')
if(k <= s){
begin = rho / (rho + (i - 1)/s)
product_iter13 = 1
for(j in 1:(k-i+1)){
product_iter13 = product_iter13 * (1 - rho / (rho + (k - j + 1)/s) )
}
out = begin * product_iter13
cache_set(nc, out)
return(out)
}else if(k > s & i > s){
out = rho / (rho+1)^(k-i+2)
cache_set(nc, out)
return(out)
}else if(i <= s & s <= k){
begin2 = rho / (( rho + 1 )^(k - s + 1) * ( rho + (i - 1)/s))
product_iter14 = 1
for(j in 1:(s-i)){
product_iter14 = product_iter14 * (1 - rho / (rho + (s - j)/s) )
}
out = begin2 * product_iter14
cache_set(nc, out)
return(out)
}
}
#proposition 4
else if( n >= 2 & 2 <= i & i <= (k + n - 1)){
#print('Proposition 4')
if(i>s){
begin11 = rho/(rho+1)
product_iter21 = 0
for(j in (i-1):(k+n-1)){
product_iter21 = product_iter21 + (1 / (rho+1))^(j-i+1) * P_n(rho, n-1, j, k, s)
}
out = begin11 * product_iter21
cache_set(nc, out)
return(out)
}else if(i <= s){
begin12 = rho / (rho + (i-1)/s)
summer1 = 0
for(j in (i-1):(s-1)){
product_iter22 = 1
for(h in 1:(j-i+1)){
product_iter22 = product_iter22 * (1 - rho / (rho + (j - h + 1) / s))
}
summer1 = summer1 + product_iter22 * P_n(rho, n-1, j, k, s)
}
product_iter23 = 1
for(h in 1:(s-i)){
product_iter23 = product_iter23 * (1 - rho / (rho + (s-h) / s))
}
summer2 = 0
for(j in s:(k+n-1)){
summer2 = summer2 + ((1 / (rho + 1))^(j-s+1) * P_n(rho, n-1, j, k, s))
}
bottom = product_iter23 * summer2
inner = summer1 + bottom
out = begin12 * inner
cache_set(nc, out)
return(out)
}
}
#check if missed all propositions
else{
stop("No proposition detected")
}
}
})()
var P_memo = (function() {
var memo = {};
var slice = Array.prototype.slice;
function f(rho, n, i, k, s){
var args = slice.call(arguments);
var value;
if (args in memo) {
return(memo[args]);
}else{
// NOTES ON THE UNITS OF INPUTS
//rho: ratio of lambda / tau
// n: arrival of nth customer
// i: are i customers in queue
// k : number of customers at t = 0
// s: machines in use
//proposition 1
if(i == (n+k)){
//print('Proposition 1')
if(k >= s){
return(Math.pow(rho / (rho + 1), n));
}else if( (k+n) <= s){
var product_iter = 1;
for(var j=1; j<= n; j++){
product_iter = product_iter * ( rho + (k + j - 1)/s );
}
return(Math.pow(rho, n) / product_iter);
}else if( k < s && s < (k + n)){
var product_iter2 = 1;
for(var j=1; j<= s-k; j++){
product_iter2 = product_iter2 * ( rho + (k + j - 1)/s );
}
product_denom = Math.pow((rho + 1), (n-s+k)) * product_iter2;
return(Math.pow(rho, n) / product_denom);
}
}
//proposition 2
else if(k == 0 && n == i){
if(n <= s){
var product_iter11 = 1;
for(var j=1; j<= n; j++){
product_iter11 = product_iter11 * (rho + (j - 1)/s);
}
return(Math.pow(rho, n) / product_iter11);
}else if(n > s){
var product_iter12 = 1;
for(var j=1; j<= s; j++){
product_iter12 = product_iter12 * ( rho + (j - 1)/s );
}
product_denom12 = Math.pow((rho + 1), (n-s)) * product_iter12;
return(Math.pow(rho, n) / product_denom12);
}
}
//if i = 1
else if(i == 1){
var upsum = 0;
for(var j=2; j<= (n+k); j++){
upsum = upsum + f(rho, n, j, k, s);
}
return(1 - upsum);
}
//proposition 3
else if(n == 1 && 2 <= i && i <= k){
if(k <= s){
begin = rho / (rho + (i - 1)/s);
var product_iter13 = 1;
for(var j=1; j<= (k-i+1); j++){
product_iter13 = product_iter13 * (1 - rho / (rho + (k - j + 1)/s) );
}
return(begin * product_iter13);
}else if(k > s && i > s){
return(rho / Math.pow((rho+1), (k-i+2)));
}else if(i <= s && s <= k){
begin2 = rho / (Math.pow( (rho + 1), (k - s + 1)) * ( rho + (i - 1)/s));
var product_iter14 = 1;
for(var j=1; j<= (s-i); j++){
product_iter14 = product_iter14 * (1 - rho / (rho + (s - j)/s) );
}
return(begin2 * product_iter14);
}
}
//proposition 4
else if( n >= 2 && 2 <= i && i <= (k + n - 1)){
if(i>s){
begin11 = rho/(rho+1);
var product_iter21 = 0;
for(var j=(i-1); j<= (k+n-1); j++){
product_iter21 = product_iter21 + Math.pow((1 / (rho+1)),(j-i+1)) * f(rho, n-1, j, k, s);
}
return(begin11 * product_iter21);
}else if(i <= s){
begin12 = rho / (rho + (i-1)/s);
var summer1 = 0;
for(var j=(i-1); j<= (s-1); j++){
var product_iter22 = 1;
for(var h=1; h<=(j-1+1); h++){
product_iter22 = product_iter22 * (1 - rho / (rho + (j - h + 1) / s));
}
summer1 = summer1 + product_iter22 * f(rho, n-1, j, k, s);
}
var product_iter23 = 1;
for(var h=1; h<=(s-i); h++){
product_iter23 = product_iter23 * (1 - rho / (rho + (s-h) / s));
}
var summer2 = 0;
for(var j=s; j<= (k+n-1); j++){
summer2 = summer2 + (Math.pow((1 / (rho + 1)), (j-s+1)) * f(rho, n-1, j, k, s)) ;
}
bottom = product_iter23 * summer2;
inner = summer1 + bottom;
return(begin12 * inner);
}
}
}
}
//Closure of f(), self-executing anonymous function
return f;
})();

Your memoization has two flaws:
(1) You never add results to memo.
(2) args in memo casts args to a string. That will work for an array of numbers, but it might fail for other inputs.
I'd write a generic version of memo like this:
const memo = fn => {
const cache = {};
return (...args) => {
const key = JSON.stringify(args);
if(key in memo) return memo[key];
return memo[key] = fn(...args);
};
};
const memoizedF = memo(f);

Related

using Damerau-Levenshtein distance to compare sets of text in code.org

Not very knowledgeable with coding, I usually use block coding and not typing.
I've used many different Levenshtein distance codes I've found online and most of them didn't work for one reason or another
var levDist = function (s, t) {
var d = []; //2d matrix
// Step 1
var n = s.length;
var m = t.length;
if (n == 0) return m;
if (m == 0) return n;
//Create an array of arrays in javascript (a descending loop is quicker)
for (var i = n; i >= 0; i--) d[i] = [];
// Step 2
for (i = n; i >= 0; i--) d[i][0] = i;
for (var j = m; j >= 0; j--) d[0][j] = j;
// Step 3
for (i = 1; i <= n; i++) {
var s_i = s.charAt(i - 1);
// Step 4
for (j = 1; j <= m; j++) {
//Check the jagged ld total so far
if (i == j && d[i][j] > 4) return n;
var t_j = t.charAt(j - 1);
var cost = (s_i == t_j) ? 0 : 1; // Step 5
//Calculate the minimum
var mi = d[i - 1][j] + 1;
var b = d[i][j - 1] + 1;
var c = d[i - 1][j - 1] + cost;
if (b < mi) mi = b;
if (c < mi) mi = c;
d[i][j] = mi; // Step 6
//Damerau transposition
if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
}
}
}
// Step 7
return d[n][m];
};
This is all the code I’ve written (including the most recent attempt of getting the levenshtein distance)
var levDist = function (s, t) {
var d = []; //2d matrix
// Step 1
var n = s.length;
var m = t.length;
if (n == 0) return m;
if (m == 0) return n;
//Create an array of arrays in javascript (a descending loop is quicker)
for (var i = n; i >= 0; i--) d[i] = [];
// Step 2
for (i = n; i >= 0; i--) d[i][0] = i;
for (var j = m; j >= 0; j--) d[0][j] = j;
// Step 3
for (i = 1; i <= n; i++) {
var s_i = s.charAt(i - 1);
// Step 4
for (j = 1; j <= m; j++) {
//Check the jagged ld total so far
if (i == j && d[i][j] > 4) return n;
var t_j = t.charAt(j - 1);
var cost = (s_i == t_j) ? 0 : 1; // Step 5
//Calculate the minimum
var mi = d[i - 1][j] + 1;
var b = d[i][j - 1] + 1;
var c = d[i - 1][j - 1] + cost;
if (b < mi) mi = b;
if (c < mi) mi = c;
d[i][j] = mi; // Step 6
//Damerau transposition
if (i > 1 && j > 1 && s_i == t.charAt(j - 2) && s.charAt(i - 2) == t_j) {
d[i][j] = Math.min(d[i][j], d[i - 2][j - 2] + cost);
}
}
}
// Step 7
return d[n][m];
};
var S = "Hello World";
var grossWPM;
var Transparency = 1;
var Timer = 60;
var InitialTime = Timer;
var Texts = getColumn("Texts", "Texts");
var TextLength = getColumn("Texts", "Number of Characters");
var Title = getColumn("Texts", "Titles");
var Author = getColumn("Texts", "Authors");
var TextSelector = randomNumber(0, 19);
console.log("Article #" + (TextSelector + 1));
console.log(TextLength[TextSelector] + " Characters in total");
console.log(Title[TextSelector]);
console.log("By: " + Author[TextSelector]);
var Countdown;
var Countdown = 6;
//Texts are obtained from
//https://data.typeracer.com/pit/texts
onEvent("button1", "click", function( ) {
timedLoop(1000, function() {
Countdown = Countdown - 1;
setText("button1", Countdown - 0);
timedLoop(100, function() {
setText("text_area2", "");
});
if (Countdown <= 1) {
stopTimedLoop();
setTimeout(function() {
setText("button1", "GO!");
setText("text_area1", Texts[TextSelector]);
if (getText("button1") == "GO!") {
var TransparentLoop = timedLoop(100, function() {
Transparency = Transparency - 0.1;
setProperty("Warning", "text-color", rgb(77,87,95, Transparency));
if (Transparency <= 0) {
deleteElement("Warning");
showElement("label2");
stopTimedLoop(TransparentLoop);
}
});
var TimerLoop = timedLoop(1000, function() {
Timer = Timer - 1;
setText("label2", Timer);
if (Timer <= 0) {
grossWPM = (TextLength[TextSelector] / 5) / ((InitialTime - Timer) / 60);
console.log(grossWPM);
setScreen("screen2");
if (Timer == 1) {
S = " second";
} else {
S = " seconds";
}
setText("label1", "Your typing speed was approximately " + (Math.round(grossWPM) + (" WPM* with " + (Timer + (S + " left")))));
stopTimedLoop(TimerLoop);
}
});
console.log("Timer Started");
timedLoop(10, function() {
var str = getText("text_area2");
if (str.length == TextLength[TextSelector]) {
stopTimedLoop(TimerLoop);
grossWPM = (TextLength[TextSelector] / 5) / ((InitialTime - Timer) / 60);
setScreen("screen2");
levDist(str, Texts[TextSelector]);
if (Timer == 1) {
S = " second";
} else {
S = " seconds";
}
setText("label1", "Your typing speed was approximately " + (Math.round(grossWPM) + (" WPM* with " + (Timer + (S + " left")))));
if (grossWPM == 69) {
setText("label4", "Nice");
}
stopTimedLoop();
}
});
}
}, 1000);
}
});
});
Obviously not that good at this so can anyone help?
I want to compare two sets of text
Something the user types in.
Paragraph that the user was supposed to type.
This is for a WPM test and I want a way to get a measurement for WPM that includes errors the user makes while typing.
If there is a way to check this besides the Levenshtein distance please tell me, I just looked up a way to do that and Levenshtein distance seemed like the way to do so
The error given by code.org says:
ERROR: Line: 50: TypeError: d[n] is undefined
I fixed the issue, I used this code
function levenshtein(s1, s2) {
if (s1 == s2) {
return 0;
}
var s1_len = s1.length;
var s2_len = s2.length;
if (s1_len === 0) {
return s2_len;
}
if (s2_len === 0) {
return s1_len;
}
// BEGIN STATIC
var split = false;
try {
split = !('0')[0];
} catch (e) {
// Earlier IE may not support access by string index
split = true;
}
// END STATIC
if (split) {
s1 = s1.split('');
s2 = s2.split('');
}
var v0 = new Array(s1_len + 1);
var v1 = new Array(s1_len + 1);
var s1_idx = 0,
s2_idx = 0,
cost = 0;
for (s1_idx = 0; s1_idx < s1_len + 1; s1_idx++) {
v0[s1_idx] = s1_idx;
}
var char_s1 = '',
char_s2 = '';
for (s2_idx = 1; s2_idx <= s2_len; s2_idx++) {
v1[0] = s2_idx;
char_s2 = s2[s2_idx - 1];
for (s1_idx = 0; s1_idx < s1_len; s1_idx++) {
char_s1 = s1[s1_idx];
cost = (char_s1 == char_s2) ? 0 : 1;
var m_min = v0[s1_idx + 1] + 1;
var b = v1[s1_idx] + 1;
var c = v0[s1_idx] + cost;
if (b < m_min) {
m_min = b;
}
if (c < m_min) {
m_min = c;
}
v1[s1_idx + 1] = m_min;
}
var v_tmp = v0;
v0 = v1;
v1 = v_tmp;
}
return v0[s1_len];
}
and I got that code from this question
This is levenshtein distance NOT damerau-levenshtein distance

How can pre-define a length of maze path when generating maze

I am trying create a maze of words with pre-defined length of found path, But have no clue about what algorithm would make it possible.
For ex: I want the length from cells start 1 to end [2] should be 11( the length of "onetwothree").
Here is the current code I am using to generate the maze:
var demotext = "onetwothree";
var widthmaze = (demotext.length + 5) / 2 + 1;
var heightmaze = (demotext.length + 5) / 2 - 1;
document.getElementById('out').innerHTML = display(maze(widthmaze, heightmaze));
function maze(x, y) {
var n = x * y - 1;
if (n < 0) {
alert("illegal maze dimensions");
return;
}
var horiz = [];
for (var j = 0; j < x + 1; j++) horiz[j] = [],
verti = [];
for (var j = 0; j < x + 1; j++) verti[j] = [],
here = [Math.floor(Math.random() * x), Math.floor(Math.random() * y)],
path = [here],
unvisited = [];
for (var j = 0; j < x + 2; j++) {
unvisited[j] = [];
for (var k = 0; k < y + 1; k++)
unvisited[j].push(j > 0 && j < x + 1 && k > 0 && (j != here[0] + 1 || k != here[1] + 1));
}
while (0 < n) {
var potential = [
[here[0] + 1, here[1]],
[here[0], here[1] + 1],
[here[0] - 1, here[1]],
[here[0], here[1] - 1]
];
var neighbors = [];
for (var j = 0; j < 4; j++)
if (unvisited[potential[j][0] + 1][potential[j][1] + 1])
neighbors.push(potential[j]);
if (neighbors.length) {
n = n - 1;
next = neighbors[Math.floor(Math.random() * neighbors.length)];
unvisited[next[0] + 1][next[1] + 1] = false;
if (next[0] == here[0])
horiz[next[0]][(next[1] + here[1] - 1) / 2] = true;
else
verti[(next[0] + here[0] - 1) / 2][next[1]] = true;
path.push(here = next);
} else
here = path.pop();
}
return {
x: x,
y: y,
horiz: horiz,
verti: verti
};
}
function display(m) {
var text = [];
for (var j = 0; j < m.x * 2 + 1; j++) {
var line = [];
if (0 == j % 2)
for (var k = 0; k < m.y * 4 + 1; k++)
if (0 == k % 4)
line[k] = '+';
else
if (j > 0 && m.verti[j / 2 - 1][Math.floor(k / 4)])
line[k] = ' ';
else
line[k] = '-';
else
for (var k = 0; k < m.y * 4 + 1; k++)
if (0 == k % 4)
if (k > 0 && m.horiz[(j - 1) / 2][k / 4 - 1])
line[k] = ' ';
else
line[k] = '|';
else
if (2 == k % 4)
line[k] = demotext[Math.floor(Math.random() * demotext.length)];
else
line[k] = ' ';
if (0 == j) {line[1] = line[3] = ' '; line[2] = '1'};
if (m.x * 2 - 1 == j) line[4 * m.y] = '2';
text.push(line.join('') + '\r\n');
}
return text.join('');
}
<pre id="out"></pre>
Moving start and end point to make the path length match with text maybe the solution but I do not know how to implement it. Any help would be great !
Ps: I would like the result can be like this:

Javascript to Python with bitwise shift in a for loop's condition and final-expression

I have a FFT code written in javascript, pasted below:
function FFT(re, im) {
var N = re.length;
for (var i = 0; i < N; i++) {
for (var j = 0, h = i, k = N; k >>= 1; h >>= 1)
j = (j << 1) | (h & 1);
if (j > i) {
re[j] = [re[i], re[i] = re[j]][0];
im[j] = [im[i], im[i] = im[j]][0]
}
}
for (var hN = 1; hN * 2 <= N; hN *= 2)
for (i = 0; i < N; i += hN * 2)
for (j = i; j < i + hN; j++) {
var cos = Math.cos(Math.PI * (j - i) / hN),
sin = Math.sin(Math.PI * (j - i) / hN);
var tre = re[j + hN] * cos + im[j + hN] * sin,
tim = -re[j + hN] * sin + im[j + hN] * cos;
re[j + hN] = re[j] - tre;
im[j + hN] = im[j] - tim;
re[j] += tre;
im[j] += tim;
}
}
There is one statement in for loop
for (var j = 0, h = i, k = N; k >>= 1; h >>= 1)
I wonder how to write this for loop in Python? The ks array and hs array are not very clear to me so I do not know how to use zip.
One possible way is with a while loop:
j = 0
h = i
k = N >> 1
while k > 0:
... # current logic in the for loop
k >>= 1
h >>= 1
The simplest way is described by #fileyfood500.
Another approach could be to separate out the k, h logic in a generator, and using a for loop:
def gen(k, h):
while k >> 1:
yield h
h >>= 1
k >>= 1
for i in range(N):
j = 0
for h in gen(N, i):
j = (j << 1) | (h & 1)
If you do this you can reduce the loop:
from functools import reduce # Py3
for i in range(N):
j = reduce(lambda j, h: (j << 1) | (h & 1), gen(N, i), 0)

LogLog and HyperLogLog algorithms for counting of large cardinalities

Where can I find a valid implementation of LogLog algorithm? Have tried to implement it by myself but my draft implementation yields strange results.
Here it is:
function LogLog(max_error, max_count)
{
function log2(x)
{
return Math.log(x) / Math.LN2;
}
var m = 1.30 / max_error;
var k = Math.ceil(log2(m * m));
m = Math.pow(2, k);
var k_comp = 32 - k;
var l = log2(log2(max_count / m));
if (isNaN(l)) l = 1; else l = Math.ceil(l);
var l_mask = ((1 << l) - 1) >>> 0;
var M = [];
for (var i = 0; i < m; ++i) M[i] = 0;
function count(hash)
{
if (hash !== undefined)
{
var j = hash >>> k_comp;
var rank = 0;
for (var i = 0; i < k_comp; ++i)
{
if ((hash >>> i) & 1)
{
rank = i + 1;
break;
}
}
M[j] = Math.max(M[j], rank & l_mask);
}
else
{
var c = 0;
for (var i = 0; i < m; ++i) c += M[i];
return 0.79402 * m * Math.pow(2, c / m);
}
}
return {count: count};
}
function fnv1a(text)
{
var hash = 2166136261;
for (var i = 0; i < text.length; ++i)
{
hash ^= text.charCodeAt(i);
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
return hash >>> 0;
}
var words = ['aardvark', 'abyssinian', ... ,'zoology']; // about 2 300 words
var log_log = LogLog(0.01, 100000);
for (var i = 0; i < words.length; ++i) log_log.count(fnv1a(words[i]));
alert(log_log.count());
For unknown reason implementation is very sensitive to max_error parameter, it is the main factor that determines the magnitude of the result. I'm sure, there is some stupid mistake :)
UPDATE: This problem is solved in the newer version of algorithm. I will post its implementation later.
Here it is the updated version of the algorithm based on the newer paper:
var pow_2_32 = 0xFFFFFFFF + 1;
function HyperLogLog(std_error)
{
function log2(x)
{
return Math.log(x) / Math.LN2;
}
function rank(hash, max)
{
var r = 1;
while ((hash & 1) == 0 && r <= max) { ++r; hash >>>= 1; }
return r;
}
var m = 1.04 / std_error;
var k = Math.ceil(log2(m * m)), k_comp = 32 - k;
m = Math.pow(2, k);
var alpha_m = m == 16 ? 0.673
: m == 32 ? 0.697
: m == 64 ? 0.709
: 0.7213 / (1 + 1.079 / m);
var M = []; for (var i = 0; i < m; ++i) M[i] = 0;
function count(hash)
{
if (hash !== undefined)
{
var j = hash >>> k_comp;
M[j] = Math.max(M[j], rank(hash, k_comp));
}
else
{
var c = 0.0;
for (var i = 0; i < m; ++i) c += 1 / Math.pow(2, M[i]);
var E = alpha_m * m * m / c;
// -- make corrections
if (E <= 5/2 * m)
{
var V = 0;
for (var i = 0; i < m; ++i) if (M[i] == 0) ++V;
if (V > 0) E = m * Math.log(m / V);
}
else if (E > 1/30 * pow_2_32)
E = -pow_2_32 * Math.log(1 - E / pow_2_32);
// --
return E;
}
}
return {count: count};
}
function fnv1a(text)
{
var hash = 2166136261;
for (var i = 0; i < text.length; ++i)
{
hash ^= text.charCodeAt(i);
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
return hash >>> 0;
}
var words = ['aardvark', 'abyssinian', ..., 'zoology']; // 2336 words
var seed = Math.floor(Math.random() * pow_2_32); // make more fun
var log_log = HyperLogLog(0.065);
for (var i = 0; i < words.length; ++i) log_log.count(fnv1a(words[i]) ^ seed);
var count = log_log.count();
alert(count + ', error ' +
(count - words.length) / (words.length / 100.0) + '%');
Here is a slightly modified version which adds the merge operation.
Merge allows you to take the counters from several instances of HyperLogLog,
and determine the unique counters overall.
For example, if you have unique visitors collected on Monday, Tuesday and Wednesday,
then you can merge the buckets together and count the number of unique visitors
over the three day span:
var pow_2_32 = 0xFFFFFFFF + 1;
function HyperLogLog(std_error)
{
function log2(x)
{
return Math.log(x) / Math.LN2;
}
function rank(hash, max)
{
var r = 1;
while ((hash & 1) == 0 && r <= max) { ++r; hash >>>= 1; }
return r;
}
var m = 1.04 / std_error;
var k = Math.ceil(log2(m * m)), k_comp = 32 - k;
m = Math.pow(2, k);
var alpha_m = m == 16 ? 0.673
: m == 32 ? 0.697
: m == 64 ? 0.709
: 0.7213 / (1 + 1.079 / m);
var M = []; for (var i = 0; i < m; ++i) M[i] = 0;
function merge(other)
{
for (var i = 0; i < m; i++)
M[i] = Math.max(M[i], other.buckets[i]);
}
function count(hash)
{
if (hash !== undefined)
{
var j = hash >>> k_comp;
M[j] = Math.max(M[j], rank(hash, k_comp));
}
else
{
var c = 0.0;
for (var i = 0; i < m; ++i) c += 1 / Math.pow(2, M[i]);
var E = alpha_m * m * m / c;
// -- make corrections
if (E <= 5/2 * m)
{
var V = 0;
for (var i = 0; i < m; ++i) if (M[i] == 0) ++V;
if (V > 0) E = m * Math.log(m / V);
}
else if (E > 1/30 * pow_2_32)
E = -pow_2_32 * Math.log(1 - E / pow_2_32);
// --
return E;
}
}
return {count: count, merge: merge, buckets: M};
}
function fnv1a(text)
{
var hash = 2166136261;
for (var i = 0; i < text.length; ++i)
{
hash ^= text.charCodeAt(i);
hash += (hash << 1) + (hash << 4) + (hash << 7) +
(hash << 8) + (hash << 24);
}
return hash >>> 0;
}
Then you can do something like this:
// initialize one counter per day
var ll_monday = HyperLogLog(0.01);
var ll_tuesday = HyperLogLog(0.01);
var ll_wednesday = HyperLogLog(0.01);
// add 5000 unique values in each day
for(var i=0; i<5000; i++) ll_monday.count(fnv1a('' + Math.random()));
for(var i=0; i<5000; i++) ll_tuesday.count(fnv1a('' + Math.random()));
for(var i=0; i<5000; i++) ll_wednesday.count(fnv1a('' + Math.random()));
// add 5000 values which appear every day
for(var i=0; i<5000; i++) {ll_monday.count(fnv1a(''+i)); ll_tuesday.count(fnv1a('' + i)); ll_wednesday.count(fnv1a('' + i));}
// merge three days together
together = HyperLogLog(0.01);
together.merge(ll_monday);
together.merge(ll_tuesday);
together.merge(ll_wednesday);
// report
console.log('unique per day: ' + Math.round(ll_monday.count()) + ' ' + Math.round(ll_tuesday.count()) + ' ' + Math.round(ll_wednesday.count()));
console.log('unique numbers overall: ' + Math.round(together.count()));
We've open sourced a project called Stream-Lib that has a LogLog implementation. The work was based on this paper.
Using the js version #actual provided, I tried to implement the same in C#, which seems close enough. Just changed fnv1a function a little bit and renamed it to getHashCode. (Credit goes to Jenkins hash function, http://en.wikipedia.org/wiki/Jenkins_hash_function)
public class HyperLogLog
{
private double mapSize, alpha_m, k;
private int kComplement;
private Dictionary<int, int> Lookup = new Dictionary<int, int>();
private const double pow_2_32 = 4294967297;
public HyperLogLog(double stdError)
{
mapSize = (double)1.04 / stdError;
k = (long)Math.Ceiling(log2(mapSize * mapSize));
kComplement = 32 - (int)k;
mapSize = (long)Math.Pow(2, k);
alpha_m = mapSize == 16 ? (double)0.673
: mapSize == 32 ? (double)0.697
: mapSize == 64 ? (double)0.709
: (double)0.7213 / (double)(1 + 1.079 / mapSize);
for (int i = 0; i < mapSize; i++)
Lookup[i] = 0;
}
private static double log2(double x)
{
return Math.Log(x) / 0.69314718055994530941723212145818;//Ln2
}
private static int getRank(uint hash, int max)
{
int r = 1;
uint one = 1;
while ((hash & one) == 0 && r <= max)
{
++r;
hash >>= 1;
}
return r;
}
public static uint getHashCode(string text)
{
uint hash = 0;
for (int i = 0, l = text.Length; i < l; i++)
{
hash += (uint)text[i];
hash += hash << 10;
hash ^= hash >> 6;
}
hash += hash << 3;
hash ^= hash >> 6;
hash += hash << 16;
return hash;
}
public int Count()
{
double c = 0, E;
for (var i = 0; i < mapSize; i++)
c += 1d / Math.Pow(2, (double)Lookup[i]);
E = alpha_m * mapSize * mapSize / c;
// Make corrections & smoothen things.
if (E <= (5 / 2) * mapSize)
{
double V = 0;
for (var i = 0; i < mapSize; i++)
if (Lookup[i] == 0) V++;
if (V > 0)
E = mapSize * Math.Log(mapSize / V);
}
else
if (E > (1 / 30) * pow_2_32)
E = -pow_2_32 * Math.Log(1 - E / pow_2_32);
// Made corrections & smoothen things, or not.
return (int)E;
}
public void Add(object val)
{
uint hashCode = getHashCode(val.ToString());
int j = (int)(hashCode >> kComplement);
Lookup[j] = Math.Max(Lookup[j], getRank(hashCode, kComplement));
}
}
I know this is an old post but the #buryat implementation has moved, and is in any case incomplete, and a bit on the slow side (sorry o_o ).
I've taken the implementation used by the new Redis release which can be found here and ported it to PHP. The repo is here https://github.com/joegreen0991/HyperLogLog
<?php
class HyperLogLog {
private $HLL_P_MASK;
private $HLL_REGISTERS;
private $ALPHA;
private $registers;
public function __construct($HLL_P = 14)
{
$this->HLL_REGISTERS = (1 << $HLL_P); /* With P=14, 16384 registers. */
$this->HLL_P_MASK = ($this->HLL_REGISTERS - 1); /* Mask to index register. */
$this->ALPHA = 0.7213 / (1 + 1.079 / $this->HLL_REGISTERS);
$this->registers = new SplFixedArray($this->HLL_REGISTERS);
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
$this->registers[$i] = 0;
}
}
public function add($v)
{
$h = crc32(md5($v));
$h |= 1 << 63; /* Make sure the loop terminates. */
$bit = $this->HLL_REGISTERS; /* First bit not used to address the register. */
$count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
while(($h & $bit) == 0) {
$count++;
$bit <<= 1;
}
/* Update the register if this element produced a longer run of zeroes. */
$index = $h & $this->HLL_P_MASK; /* Index a register inside registers. */
if ($this->registers[$index] < $count) {
$this->registers[$index] = $count;
}
}
public function export()
{
$str = '';
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
$str .= chr($this->registers[$i]);
}
return $str;
}
public function import($str)
{
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
$this->registers[$i] = isset($str[$i]) ? ord($str[$i]) : 0;
}
}
public function merge($str)
{
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
if(isset($str[$i]))
{
$ord = ord($str[$i]);
if ($this->registers[$i] < $ord) {
$this->registers[$i] = $ord;
}
}
}
}
/**
* #static
* #param $arr
* #return int Number of unique items in $arr
*/
public function count() {
$E = 0;
$ez = 0;
for ($i = 0; $i < $this->HLL_REGISTERS; $i++) {
if ($this->registers[$i] !== 0) {
$E += (1.0 / pow(2, $this->registers[$i]));
} else {
$ez++;
$E += 1.0;
}
}
$E = (1 / $E) * $this->ALPHA * $this->HLL_REGISTERS * $this->HLL_REGISTERS;
/* Use the LINEARCOUNTING algorithm for small cardinalities.
* For larger values but up to 72000 HyperLogLog raw approximation is
* used since linear counting error starts to increase. However HyperLogLog
* shows a strong bias in the range 2.5*16384 - 72000, so we try to
* compensate for it. */
if ($E < $this->HLL_REGISTERS * 2.5 && $ez != 0) {
$E = $this->HLL_REGISTERS * log($this->HLL_REGISTERS / $ez);
}
else if ($this->HLL_REGISTERS == 16384 && $E < 72000) {
// We did polynomial regression of the bias for this range, this
// way we can compute the bias for a given cardinality and correct
// according to it. Only apply the correction for P=14 that's what
// we use and the value the correction was verified with.
$bias = 5.9119 * 1.0e-18 * ($E*$E*$E*$E)
-1.4253 * 1.0e-12 * ($E*$E*$E)+
1.2940 * 1.0e-7 * ($E*$E)
-5.2921 * 1.0e-3 * $E+
83.3216;
$E -= $E * ($bias/100);
}
return floor($E);
}
}
I implemented loglog and hyperloglog in JS and PHP and well-commented code https://github.com/buryat/loglog

correctness of in-place convolution filter?

I have a simple box blur function that takes an ImageData object, returning it when done. However, I have just realised that this implementation may be incorrect because the ImageData object is edited in place and convolution filters depend on surrounding pixels. Should I be reading from the original ImageData and writing to a new one so that each pixel doesn't depend on already-changed surrounding pixels? If so, I'll have to rework my web worker manager to supply a new ImageData for the convolution functions to write to.
expressive.boxBlur = function(data, options) {
var w = data.width, h = data.height, dataReal = data.data;
for (var i = 0; i < w; i++)
for (var j = 0; j < h; j++)
for (var k = 0; k < 4; k++) {
var total = 0, values = 0, temp = 0;
if (!(i == 0 && j == 0)) {
temp = dataReal[4 * w * (j - 1) + 4 * (i - 1) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(i == w - 1 && j == 0)) {
temp = dataReal[4 * w * (j - 1) + 4 * (i + 1) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(i == 0 && j == h - 1)) {
temp = dataReal[4 * w * (j + 1) + 4 * (i - 1) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(i == w - 1 && j == h - 1)) {
temp = dataReal[4 * w * (j + 1) + 4 * (i + 1) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(j == 0)) {
temp = dataReal[4 * w * (j - 1) + 4 * (i + 0) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(j == h - 1)) {
temp = dataReal[4 * w * (j + 1) + 4 * (i + 0) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(i == 0)) {
temp = dataReal[4 * w * (j + 0) + 4 * (i - 1) + k];
if (temp !== undefined) values++, total += temp;
}
if (!(i == w - 1)) {
temp = dataReal[4 * w * (j + 0) + 4 * (i + 1) + k];
if (temp !== undefined) values++, total += temp;
}
values++, total += dataReal[4 * w * j + 4 * i + k];
total /= values;
dataReal[4 * w * j + 4 * i + k] = total;
}
return data;
};
You're right, you need a separate image to put the convoluted result in. Unless the impuls response is a scaled dirac function. (i.e. it has only 1 point in the center)
However, you could do with a cache for only a few scanlines, saving a lot of memory.

Categories

Resources