is this the right approach to calculate cosine similarity? - javascript

If you guys can please review if the following approach (pseudo-code) is good to go to calcualte cosine similarity between 2 vectors:
var vectorA = [2,5,7,8];
var referenceVector= [1,1,1,1];
//Apply weights to vectors (apply positive or negative weights to elements)
var weightageVector = [1,0.5,2,1.5];
var weighted vectA = GetWeightedVector(vectorA);
//normalize each element to a value beteen 0 and 1
//#see http://stn.spotfire.com/spotfire_client_help/norm/norm_scale_between_0_and_1.htm
as calcuated here:http://jsfiddle.net/snehilw/86jqo1sm/4/
var normalizedVectorA = GetNormalizedVector(vectorA); //using the formula above
var cosineSimilarityScore = GetCosineSimilarityScore(referenceVector, normalizedVectorA );
can someone please advise if this is correct approach as this is not giving me correct results.
As requested, here is the code snippet:
var defaultVectorWeights = [1,0.5,2,1.5];
var referenceVector = [1, 1, 1, 1] //Default values for the reference vector (Do not change these);
var supportedVectorLength = referenceVector.length;
function getNormalizedVector(multiDimArray, vector){
var normalizedVector = [];
if(vector.length == supportedVectorLength){
var normalizedValue = 0;
for(var j = 0; j < supportedVectorLength ; j++){
var min = getMinMaxForMultidimensionalArrayColumn(multiDimArray,j)[0];
var max = getMinMaxForMultidimensionalArrayColumn(multiDimArray,j)[1];
normalizedValue = (max == min) ? 0.5 : (vector[j] - min) / (max - min);
normalizedVector.push(normalizedValue);
}
}
//console.log('normalizedVector='+normalizedVector);
return normalizedVector;
}
function getCosineSimilarityScore(vectorA, vectorB) {
var similarityScore;
if((vectorA.length == supportedVectorLength) && (vectorB.length == supportedVectorLength)){
var lenVectA = vectorA.length,
product = 0,
normVectorA = 0,
normVectorB = 0;
for (var i = 0; i < lenVectA ; i++) {
product += vectorA[i] * vectorB[i];
normVectorA += vectorA[i] * vectorA[i];
normVectorB += vectorB[i] * vectorB[i];
}
similarityScore = product / (Math.sqrt(normVectorA) * Math.sqrt(normVectorB));
}
else {
//TODO: Handle exception/ Fire an event to notify the server about this exception
console.log("Cosine similarity workload vectors are of unequal lengths");
}
return similarityScore;
}
function getWeightedVector(vector) {
var vectorArray = []; //Initialize
if(vector.length == supportedVectorLength){
for(var j = 0; j < supportedVectorLength ; j++){
vectorArray.push(defaultVectorWeights[j]*vector[j]);
}
}
else{
//TODO: Handle exception/ Fire an event to notify the server about this exception
console.log("Cosine similarity workload vector is of unsupported length");
}
return vectorArray;
}
function getMinMaxForMultidimensionalArrayColumn(multiDimArray, column){
var _MIN_MAX = []; //[min,max]
var columnarArray = [];
if(column < supportedVectorLength){
//Extract columnar array from the multi-dimensional array
$.map(multiDimArray, function( arrayVect) {
columnarArray.push(arrayVect[column]);
});
//Find the MIN and MAX
_MIN_MAX.push(Math.min.apply(Math,columnarArray));
_MIN_MAX.push(Math.max.apply(Math,columnarArray));
}
else{
//TODO: Handle exception/ Fire an event to notify the server about this exception
console.log("Cosine similarity workload vectors are of unequal lengths");
}
return _MIN_MAX;
}
function getAssociateWorkloadScore(multiDimArray,queryVector){
var workloadScore;
var weightedQueryVector = [];
var weightedMultiDimArr = [];
var normalizedMultiDimArr = [];
var normalizedQueryVector = [];
//Apply feature scaling
weightedQueryVector = getWeightedVector(queryVector);
weightedMultiDimArr = getWeightedMultiDimArr(multiDimArray);
normalizedQueryVector = getNormalizedVector(weightedMultiDimArr, weightedQueryVector);
workloadScore = getCosineSimilarityScore(referenceVector, normalizedQueryVector);
console.log('weightedQueryVector='+weightedQueryVector);
console.log('weightedMultiDimArr='+JSON.stringify(weightedMultiDimArr));
console.log('normalizedMultiDimArr='+JSON.stringify(normalizedMultiDimArr));
console.log('normalizedQueryVector='+normalizedQueryVector);
console.log('workloadScore='+JSON.stringify(workloadScore));
return workloadScore;
}
function getTeamWorkloadScore(multiDimArray){
var workloadScores = [];
for(var j = 0; j < multiDimArray.length ; j++){
workloadScores.push(getAssociateWorkloadScore(multiDimArray,multiDimArray[j]));
}
return workloadScores;
}

A cosine similarity is just a dot product divided by the product of norms. So why not make a dot product function and a norm function and divide the results? (dotproduct from http://c2.com/cgi/wiki?DotProductInManyProgrammingLanguages)
function dotproduct(a,b) {
var n = 0, lim = Math.min(a.length,b.length);
for (var i = 0; i < lim; i++) n += a[i] * b[i];
return n;
}
function norm2(a) {var sumsqr = 0; for (var i = 0; i < a.length; i++) sumsqr += a[i]*a[i]; return Math.sqrt(sumsqr);}
function similarity(a, b) {return dotproduct(a,b)/norm2(a)/norm2(b);}
Now similarity([1,0,0], [0,1,1]) == 0

If you necessarily need scale-invariance (i.e., the original cosine similarity), then use Gavin's code augmented with checks for zero-vectors
function cosine_sim(x, y) {
xnorm = norm2(x);
if(!xnorm) return 0;
ynorm = norm2(y);
if(!ynorm) return 0;
return dotproduct(x, y) / (xnorm * ynorm);
}
If you do not need scale-invariance, just use the dot product (i.e., cosine_sim(x, y) is dotproduct(x, y)).

Related

Neural network in JS does not train

I am trying to create a simple neural network in javascript with 2 inputs, 3 hidden and 1 output neurons that use matrixes of neurons and weights to pass forward, and backpropagation training to solve XOR problem for example. The problem is I get weights always fading to 0 fast and as a result I thing outputs equal to sigmoid(0) 0.5 value or really close. I am using a sigmoid function for activation while finding neuron values like this:
var multyplayMatrix = (a,b) => {
if(a.length !== b.length) return "length of matrix1 !== height of matrix2 :(";
let c = [];
let temp = 0;
for(var n = 0; n < b[0].length; n++){
for(var nn = 0; nn < a.length; nn++){
temp = temp + (a[nn] * b[nn][n]);
}
//sigmoid
c.push( 1 / (1 + Math.exp(-temp)) );
temp = 0;
}
return c;
}
After finding output from training data input I compare it with training data desired output and calculate error and weights delta with this:
var getError = () => {
let predicted;
if(desiredOutput !== undefined){
predicted = net.neurons[net.neurons.length-1][0]
}else{
return console.log("desired output not defined")
}
let error = predicted - desiredOutput[0];
let weights_delta = error * (predicted * (1 - predicted));
console.log("output error weight delta = " + weights_delta)
propogateBackward(weights_delta);
};
and start backpropagating with some learning rate - My backdrop function looks like this:
var learningRate = 0.5;
var propogateBackward = (output_weights_delta) => {
for(var n = 0; n < 3; n ++ ){
net.weights[net.weights.length-(1 + n)][0] = (net.weights[net.weights.length-(1 + n)][0] - net.neurons[net.neurons.length-2][2-n]) * output_weights_delta * learningRate;
}
let neuronErrors = [];
for(var n = 0; n < 3; n ++ ){
let hiddenError = output_weights_delta * net.weights[net.weights.length-(1 + n)][0];
let hidden_weights_delta = hiddenError * (net.neurons[net.neurons.length-2][2-n] * (1 - net.neurons[net.neurons.length-2][2-n]));
neuronErrors.push(hidden_weights_delta);
}
for(var n = 0; n < 3; n++){
for(var nn = 0; nn < 2; nn++){
net.weights[net.weights.length-(4 + nn)][n] = (net.weights[net.weights.length-(4 + nn)][0] - net.neurons[net.neurons.length-3][1-nn]) * neuronErrors[n] * learningRate;
}
}
draw();
}
Im using this function and XOR dataset to make a iteration/training loop like this and just after some iterations it already coverges close to 0.5
var train = (iterations) => {
for(var it = 0; it < iterations;it++){
for(var n = 0; n < dataset.length; n++){
addInput(dataset[n].inputs);
addOutput(dataset[n].outputs);
activate();
getError();
}
}
}
After googling most similar problems are connected with weight initiation in longer/deeper neural networks but that does not help here, and I did a couple of different variants of this net all get similar result so I'm probably loose on my theory/math..but where?:(
I'm just learning stats and this can be confusing, for me so excuse me if I didn't make 2 much sense - You can check the js live implementation here: https://codepen.io/sanchopanza/pen/MWaZJJe

find target's target i.e friend of friend in force directed graph

I am using D3.JS libraries force directed graph,
I am using the following code to find target nodes
graph.links.forEach(function(d) {
linkedByIndex[d.source.index + "," + d.target.index] = 1;
linkedByIndex[d.target.index + "," + d.source.index] = 1;
});
});
function neighboring(a, b) {
return a.index == b.index || linkedByIndex[a.index + "," + b.index];
}
How can I find and highlight target's target ? Can anybody help me with this ?
EDIT:
Solved the problem in the following way:
First created the adjacent matrix:
var adjMat=null;
var adjMatSq=null;
adjMat=new Array(graph.nodes.length);
for(var i = 0;i < graph.nodes.length; ++i)
{
adjMat[i]=new Array(graph.nodes.length);
for(var j = 0; j < graph.nodes.length; ++j)
{
adjMat[i][j] = 0;
}
}
Then assigned the values to the adjacent matrix:
graph.links.forEach(function(d) {
adjMat[d.source.index][d.target.index] = adjMat[d.target.index][d.source.index] = 1;
adjMat[d.source.index][d.source.index] = 1;
});
adjMatSq=matrixMultiply(adjMat, adjMat);
});
Then I found the square of matrix so that I'll be able to get the second degree nodes:
function matrixMultiply(m1,m2)
{
var result = [];
for(var j = 0; j < m2.length; j++) {
result[j] = [];
for(var k = 0; k < m1[0].length; k++) {
var sum = 0;
for(var i = 0; i < m1.length; i++) {
sum += m1[i][k] * m2[j][i];
}
result[j].push(sum);
}
}
return result;
}
Defined a function to find the second degree nodes:
function areAtSecondDegree(a,c)
{
return adjMatSq[a.index][c.index] == 1;
}
My Code Plnkr
The principle is as follows. Given a specific node, determine its immediate neighbours. To get second degree neighbours, take this list of neighbours you have just determined, and for each get the list of neighbours again. The union of all those lists is the list of first and second degree neighbours.
In code this could look like this:
var connected = {};
var friends = graph.nodes.filter(function(o) { return areFriends(d, o); });
friends.forEach(function(o) {
connected[o.name] = 1;
// second pass to get second-degree neighbours
graph.nodes.forEach(function(p) {
if(areFriends(o, p)) {
connected[p.name] = 1;
}
});
});
If you want to have arbitrary n degree neighbours, you would need to change this to accommodate the fact that you don't know how many iterations of this to run.
Complete demo here.

2D array improperly filled and undefined in JavaScript

I am writing a function with two sub-functions: one to print a 2D array from two values in the form and the other to print out the 2D array. At this point, I have been trying to use variables with integer values to fill in the 2D array, but they are not being filled properly. The min variable seems to have the value of 0 within the function instead of what is being passed.
Anyways after I create the 2D array, I try to test print the 2D array 'twoDarray', but it comes up as undefined?
function isValid(frm) {
var xValue = document.getElementById("X").value;
var yValue = document.getElementById("Y").value;
if (xValue <= yValue)
{
/* Draw out the multiplication table. */
function gen2Darray(min, max)
{
var lengthOf2Darray = (max - min);
var twoDarray = new Array(lengthOf2Darray);
for (var i = 0; i < lengthOf2Darray; i++)
{
twoDarray[i] = new Array(lengthOf2Darray);
for (var j = 0; j < lengthOf2Darray; j++)
{
twoDarray[i][j] = ((min + i) * (min + j)); // only takes i
}
}
}
gen2Darray(xValue, yValue);
function print_2d_string_array (array)
{
// print twoDarray, but twoDarray undefined here
}
I have created a jsfiddle account, but it does not work the same way on the site: http://jsfiddle.net/imparante/5yTEv/.
Fix:
function isValid(frm) {
var xValue = document.getElementById("X").value;
var yValue = document.getElementById("Y").value;
var lengthOf2Darray = (yValue - xValue);
var twoDarray = new Array(lengthOf2Darray);
if (xValue <= yValue) {
/* Draw out the multiplication table. */
function gen2Darray(min, max) {
// var lengthOf2Darray = (max - min);
// var twoDarray = new Array(lengthOf2Darray);
for (var i = 0; i < lengthOf2Darray; i++) {
twoDarray[i] = new Array(lengthOf2Darray);
for (var j = 0; j < lengthOf2Darray; j++) {
twoDarray[i][j] = ((min + i) * (min + j));
$("#table").append(twoDarray[i][j] + " ");
}
}
return twoDarray;
}
gen2Darray(xValue, yValue);
function print_2d_string_array(array){
// print twoDarray
}
You're getting undefined on twoDarray because the array is declared and created inside gen2Darray() but you are trying to access it from print_2d_string_array().
If you move your var twoDarray = new Array(); up to you xValue and yValue declarations that should solve the undefined error.
Bin example
function isValid(frm) {
var xValue = document.getElementById("X").value;
var yValue = document.getElementById("Y").value;
var lengthOf2Darray;
var twoDarray = [];
if (xValue <= yValue){
/* Draw out the multiplication table. */
gen2Darray(xValue, yValue);
print_2d_string_array(twoDarray);
}
function gen2Darray(min, max){
lengthOf2Darray = (max - min);
twoDarray = [lengthOf2Darray];
for (var i = 0; i < lengthOf2Darray; i++){
twoDarray[i] = new Array(lengthOf2Darray);
for (var j = 0; j < lengthOf2Darray; j++){
twoDarray[i][j] = ((min + i) * (min + j)); // only takes i
}
}
}
function print_2d_string_array(array){
console.log(array);
}
}
The declarating of the functions were a bit wrong, also the function gen2Darray was not actually returning anything, so the variable twoDarray would only be available in the scope of the function.
Please see fixed code here: http://jsfiddle.net/CC5vP/
I moved the two functions out of the main function declaration and then added a return in gen2Darray(min, max)
It appears to be working, you will want to modify print_2d_string_array(a) to display the data instead of logging it to the console.

JavaScript syntax issue

I'm doing "fifteen puzzle" game. I'm only a beginner, so I chose this project to implement. My problem is shuffle algorithm :
function shuffle() {
$('td').empty();
var p = 0;
var f = 0;
do {
var arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15];
var rand = arr[Math.floor(Math.random() * arr.length)];
if ($('#' + rand).is(':empty')) {
p = p + 1;
document.getElementById(rand).textContent = p
var f = $('td').not(":empty").length;
} else {}
} while (f < 15)
That works cool, but I've heard that almost 50% of all random shuffle like mine is unsolvable. So I found math formula at wikipedia.org for this game, explaining how you can avoid that.
Here's modified algorithm that doesn't work either. The way I know it is alert stuff: it launches only 2 times instead of 31.
array = [];
function algorithm (){
// alert('works')
for (var c=16; c<17; c++){
document.getElementById(c).textContent = '100';
}
for (var i=1; i<16; i++){
var curId = document.getElementById(i).id;
var curIdNum = Math.floor(curId);
alert('works')
var curIn = document.getElementById(i).textContent;
var curInNum = Math.floor(curIn);
array.push(i);
array[i] = new Array();
for (var j=1; j<15; j++){
var nextId = curIdNum + j; //curIdNum NOT cerIdNum
var nextIn = document.getElementById(nextId).textContent;
//alert('works')
if (nextId < 16){
var nextInNum = Math.floor(nextIn);
if (curInNum > nextInNum){
array[i].push(j)
}
}
}
var sum = 0;
for (var a=0; a<15; a++){
var add = array[a].length;
sum = sum + add;
}
var end = sum + 4;
if (end % 2 == 0){
document.getElementById('16').textContent = "";
}
else {
shuffle();
}
}
}
The question is the same:
What's wrong? Two-dimensional array doesn't work.If you've got any questions - ask.
Just to make it clear: 2 for loops with i and j should make a 2-dimensional array like this [ this is " var i" -->[1,3,4,5,7], this is "var i" too-->[5,7,9,14,15]]. Inside each i there's j. The for loop with var a should count the number of js inside each i. if the number of js is even, the code is finished and shuffle's accomplished, otherwise shuffle should be made once again.
var nextId = cerIdNum + j;
in that fiddle, I don't see this cerIdNum declared & defined neither as local nor as global variable, I suppose that is curIdNum
Please use the below definition of algorithm and let us know if this works. Basically, the alert messages would come only twice, since there were usages of undefined variables. For the purpose of illustration, I have placed comments at where the problem points occured. Due to these problems, your script would stop executing abruptly thereby resulting in the behavior you described.
Oh and by the way - I did not have time to go through the Wiki link provided - hence you will have to verify your logic is correct. However, I have definitely resolved the errors causing the behavior you observed.
As an aside - consider using jQuery, your code will be a lot cleaner...
function algorithm (){
// alert('works')
for (var c=16; c<17; c++){
document.getElementById(c).textContent = '100';
}
for (var i=1; i<16; i++){
var curId = document.getElementById(i).id;
var curIdNum = Math.floor(curId);
alert('works')
var curIn = document.getElementById(i).textContent;
var curInNum = Math.floor(curIn);
array.push(i);
for (var j=1; j<15; j++){
var nextId = curIdNum + j; //curIdNum NOT cerIdNum
var nextIn = document.getElementById(nextId).textContent;
//alert('works')
if (nextId < 16){
var nextInNum = Math.floor(nextIn);
if (curInNum > nextInNum){
array.push(j) //array[i].push does not make sense
}
}
}
var sum = 0;
for (var a=0; a<15; a++){
var add = array.length; //array[1].length does not make sense
sum = sum + add;
}
var end = sum + 4;
if (end % 2 == 0){
document.getElementById('16').textContent = "";
}
else {
shuffle();
}
}
}
I found the solution by totally rewriting the code. Thank everyone for help!
Here's what do work:
function shuffle (){
press = 1;
$('td').empty().removeClass();
p=0;
var f;
do {
var arr=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];
var rand=arr[Math.floor(Math.random()*arr.length)];
if ($('#'+ rand).is(':empty')){
p = p + 1;
document.getElementById(rand).textContent = p
var f = $('td').not(":empty").length;
}
else{}
}while(f < 15){
winChance();
}
}
function winChance (){
array = [];
for (i=1;i<16;i++){
array[i]= new Array();
var currentId = $('#' + i).attr('id');
var currentIn = $('#' + i).html()
var currentIdNum = parseInt(currentId, 10);
var currentInNum = parseInt(currentIn, 10);
for (j=1;j<16;j++){
var nextId = currentIdNum + j;
if (nextId < 16){
var nextIn = $('#' + nextId).html();
var nextInNum = parseInt(nextIn, 10);
if (currentInNum > nextInNum){
array[i].push(j);
}
}
}
}
checkSum();
}
function checkSum(){
var sum = 0;
for (var a=1; a<16; a++){
var add = array[a].length;
sum = sum + add;
}
var end = sum + 4;
if (end % 2 == 0){}
else {
shuffle();
}
}

For Loops inside For Loops inside For Loops... = Problems

So. I have 4 for loops inside other for loops in JS, and my code appears (FireBug agrees with me) that my code is syntactically sound, and yet it refuses to work. I'm attempting to calculate the key length in a vigenere cipher through the use of the Index of Coincidence, and Kappa tests <- if that helps any.
My main problem is that the task seems to be too computationally intensive for Javascript to run, as Firefox shoots up past 1GB of memory usage, and 99% CPU when I attempt to run the keylengthfinder() function. Any ideas of how to solve this problem, even if it takes much longer to calculate, would be greatly appreciated. Here's a link to the same code - http://pastebin.com/uYPBuZZz - Sorry about any indenting issues in this code. I'm having issues putting it on the page correctly.
function indexofcoincidence(text){
text = text.split(" ").join("").toUpperCase();
var textL = text.length;
var hashtable = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0];
var alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (d=0; d<=25; d++) {
for (i=0; i < textL; i++){
if (text.charAt(i) === alphabet.charAt(d)){
hashtable[d] = hashtable[d] + 1;
}
}
}
var aa = hashtable[0]/textL;
var A = aa*aa;
var bb = hashtable[1]/textL;
var B = bb*bb;
var cc = hashtable[2]/textL;
var C = cc*cc;
var dd = hashtable[3]/textL;
var D = dd*dd;
var ee = hashtable[4]/textL;
var E = ee*ee;
var ff = hashtable[5]/textL;
var F = ff*ff;
var gg = hashtable[6]/textL;
var G = gg*gg;
var hh = hashtable[7]/textL;
var H = hh*hh;
var ii = hashtable[8]/textL;
var I = ii*ii;
var jj = hashtable[9]/textL;
var J = jj*jj;
var kk = hashtable[10]/textL;
var K = kk*kk;
var ll = hashtable[11]/textL;
var L = ll*ll;
var mm = hashtable[12]/textL;
var M = mm*mm;
var nn = hashtable[13]/textL;
var N = nn*nn;
var oo = hashtable[14]/textL;
var O = oo*oo;
var pp = hashtable[15]/textL;
var P = pp*pp;
var qq = hashtable[16]/textL;
var Q = qq*qq;
var rr = hashtable[17]/textL;
var R = rr*rr;
var ss = hashtable[18]/textL;
var S = ss*ss;
var tt = hashtable[19]/textL;
var T = tt*tt;
var uu = hashtable[20]/textL;
var U = uu*uu;
var vv = hashtable[21]/textL;
var V = vv*vv;
var ww = hashtable[22]/textL;
var W = ww*ww;
var xx = hashtable[23]/textL;
var X = xx*xx;
var yy = hashtable[24]/textL;
var Y = yy*yy;
var zz = hashtable[25]/textL;
var Z = zz*zz;
var Kappa = A+B+C+D+E+F+G+H+I+J+K+L+M+N+O+P+Q+R+S+T+U+V+W+X+Y+Z;
var Top = 0.027*textL;
var Bottom1 = 0.038*textL + 0.065;
var Bottom2 = (textL - 1)*Kappa;
var KeyLength = Top/(Bottom2 - Bottom1) ;
return Kappa/0.0385;
}
function keylengthfinder(text){
// Average Function Definition
Array.prototype.avg = function() {
var av = 0;
var cnt = 0;
var len = this.length;
for (var i = 0; i < len; i++) {
var e = +this[i];
if(!e && this[i] !== 0 && this[i] !== '0') e--;
if (this[i] == e) {av += e; cnt++;}
}
return av/cnt;
}
// Begin the Key Length Finding
var textL = text.length;
var hashtable = new Array(0,0,0,0,0,0,0,0,0,0,0,0);
for (a = 0; a <= 12; a++){ // This is the main loop, testing each key length
var stringtable = [];
for (z = 0; z <= a; z++){ // This allows each setting, ie. 1st, 4th, 7th AND 2nd, 5th, 8th to be tested
for (i = z; i < textL; i + a){
var string = '';
string = string.concat(text.charAt(i)); // Join each letter of the correct place in the string
stringtable[z] = indexofcoincidence(string);
}
}
hashtable[a] = stringtable.avg();
}
return hashtable;
}
Your problem is definitely right here
for (i = z; i < textL; i + a){
var string = '';
string = string.concat(text.charAt(i)); // Join each letter of the correct place in the string
stringtable[z] = indexofcoincidence(string);
}
Notice that if a=0 i never changes and therefore you are in an infinite loop.
Array.prototype.avg = function() {...}
should be only done once, and not every time keylengthfinder is called.
var Top = 0.027*textL;
var Bottom1 = 0.038*textL + 0.065;
var Bottom2 = (textL - 1)*Kappa;
var KeyLength = Top/(Bottom2 - Bottom1) ;
return Kappa/0.0385;
Why do you computer those variables if you don't use them at all?
var string = '';
string = string.concat(text.charAt(i)); // Join each letter of the correct place in the string
stringtable[z] = indexofcoincidence(string);
I don't know what you are trying to do in here. The string will always be only one character?
for (i = z; i < textL; i + a) {
...
stringtable[z] = ...
}
In this loop, you are computing values for i from z to textL - but you overwrite the same array item each time. So it would be enough to compute the stringtable[z] for i=textL-1 - or your algorithm is flawed.
A much shorter and more concise variant of the indexofcoincidence function:
function indexofcoincidence(text){
var l = text.replace(/ /g, "").length;
text = text.toUpperCase().replace(/[^A-Z]/g, "");
var hashtable = {};
for (var i=0; i<l; i++) {
var c = text.charAt(i);
hashtable[c] = (hashtable[c] || 0) + 1;
}
var kappa = 0;
for (var c in hashtable)
kappa += hashtable[c] * hashtable[c];
return kappa/(l*l)/0.0385;
}
All right. Now that we found your problem (including the infinite loop in case a=0, as detected by qw3n), let's rewrite the loop:
function keylengthfinder(text) {
var length = text.length,
probabilities = []; // probability by key length
maxkeylen = 13; // it might make more sense to determine this in relation to length
for (var a = 1; a <= maxkeylen; a++) { // testing each key length
var stringtable = Array(a); // strings to check with this gap
// read "a" as stringtable.length
for (var z = 0; z < a; z++) {
var string = '';
for (var i = z; i < textL; i += a) {
string += text.charAt(i);
}
// a string consisting of z, z+a, z+2a, z+3a, ... -th letters
stringtable[z] = string;
}
var sum = 0;
// summing up the coincidence indizes for current stringtable
for (var i=0; i<a; i++) {
sum += indexofcoincidence(stringtable[i]);
}
probabilities[a] = sum / a; // average
}
return probabilities;
}
Every of the loop statements has changed against your original script!
Never forget to declare the running variable to be local (var keyword)
a needs to start at zero - a key must have a minimum length of 1
to run from 1 to n, use i=1; i<=n; i++
to run from 0 to n-1, use i=0; i<n; i++ (nearly all loops, especially on zero-based array indizes).
Other loops than those two never occur in normal programs. You should get suspicious if you have loops from 0 to n or from 1 to n-1...
The update expression needs to update the running variable. i++ is a shortcut for i+=1 is a shortcut for i=i+1. Your expression, i + a, did not assign the new value (apart from the a=0 problem)!

Categories

Resources