Shuffle array and prevent more than 2 in a row - javascript

I have an array being built like this:
var entries = ['L','L','L','L','L','L','L','L','L','L','R','R','R','R','R','R','R','R','R','R','M','M','M','M','M']
This means the array is always filled with 10 times L, 10 times R and 5 times M
The output I want to achieve is a randomly generated array, so I came up with the simple solution to just shuffle it with
function shuffle(o){
for(var j, x, i = o.length; i; j = Math.floor(Math.random() * i), x = o[--i], o[i] = o[j], o[j] = x);
return o;
}
The problem I have now is that there is a rule for the outcome, never have one of these letters more than 2 times in a row. So I thought I just use a do/while loop to shuffle until that criteria is met. But in my test runs this totally fails with long loops.
So my question is - what is the best way to build this array without depending on luck. My full program that fails is something like this
function shuffle(o){
for(var j, x, i = o.length; i; j = Math.floor(Math.random() * i), x = o[--i], o[i] = o[j], o[j] = x);
return o;
}
function createProgram(numShooters){
var programs = [];
for(var s = 0; s < numShooters; s++){
//Build array with L/R/M
for( d=0; d < 10; d++ ){
program.push('L');
program.push('R');
if(d < 5){
program.push('M');
}
}
// This will run way too long and is not reliable
//do{
// program = shuffle(program);
//}while(!checkProgram(program))
if(!checkProgram(program)){
console.log('invalid program at ' + s);
}
programs[s] = program;
}
return programs;
}
function checkProgram(program){
var len = program.length;
var last = null;
var dups = 0;
for(var i=0; i<len; len++){
if(program[i] == last){
dups++;
}else{
dups = 0;
}
if(dups == 2){
return false;
}
last = program[i];
}
return true;
}
createProgram(5);

Instead of just shuffle arrays and hope for one without duplicates, you can create them by picking characters by random and specifically avoid to pick the character that was picked previously.
If you keep track of how many there are left to pick of each character, you can control the odds for the character to pick so that the distribution is correct. If for example the first two characters are L, then there are 10 R and 5 M left to pick from (and 8 L, but they are excluded for the next pick), so there should be a 2 in 3 chance to pick an R and a 1 in 3 chance to pick and M.
This approach can run into a dead end, where the array can't be completed, so it has to start over. Running it a few hundred times I have seen something like a 10% overhead, so if you create five arrays you should by average see a retry every other time.
function createProgram(numShooters){
var programs = [];
for(var s = 0; s < numShooters; s++){
var chars = [ 'L', 'R', 'M' ];
var program;
do {
program = [];
var cnt = [ 10, 10, 5, 0 ]; // picks left
var prev = 3; // previous pick
var tot = 25; // total picks left
while (program.length < 25) {
// check for duplicates
var x = program.length >= 2 && program[program.length - 2] == program[program.length - 1] ? prev : 3;
// check if more picks are possible
if (tot - cnt[x] <= 0) {
console.log('invalid program ' + program);
break;
}
// pick from the possible
var r = Math.floor(Math.random() * (tot - cnt[x]));
// determine what character was picked
var c = 0;
while (c == x || r >= cnt[c]) {
if (c != x) r -= cnt[c];
c++;
}
program.push(chars[c]);
cnt[c]--;
tot--;
prev = c;
}
} while (program.length < 25);
programs[s] = program;
}
return programs;
}
console.log(createProgram(1).toString());

So this is the final solution I came up with, as commented Guffas solution also works nice and smooth. But after doing some tests mine is about 30% faster and more readable, but way longer so I'll accept Guffas - thanks to everybody for their input!
function createProgram(numShooters){
var programs = [];
for(var s = 0; s < numShooters; s++){
var program = buildProgram();
programs[s] = program;
}
return programs;
}
function buildProgram(){
var program = [];
var ls = fillArray('L',10);
var rs = fillArray('R',10);
var ms = fillArray('M',5);
//Use either L or R to mix into M - adds variation
var side = Math.random() > 0.5 ? ls : rs;
var otherSide = side == ls ? rs : ls;
var initProg = side.concat(ms);
initProg = shuffle(initProg);
var program = [];
//Correcting invalid positions as suggested
for(var p1 = 0; p1 < initProg.length; p1++){
if(p1 > 1 && initProg[p1-1] == initProg[p1-2] && initProg[p1-1] == initProg[p1]){
if(otherSide.length > 0){
program.push(otherSide.pop());
}else{
return buildProgram(); //impossible state, redo...
}
}
program.push(initProg[p1]);
}
//Fill into remaining other pos
for(var p2 = 0; p2 < otherSide.length; p2++){
program = addAtRandomPos(program,otherSide[p2]);
}
return program;
}
function addAtRandomPos(arr,chr){
var pos = getRandomInt( 0, arr.length - 1 );
var charCur = arr[pos];
var pprev = pos > 1 ? arr[pos-2] : null;
var prev = pos > 0 ? arr[pos-1] : null;
var next = pos < arr.length - 1 ? arr[pos] : null;
var nnext = pos < arr.length - 2 ? arr[pos+1] : null;
var str = pprev + prev + chr + next + nnext;
if(str.indexOf('MMM') !== -1 || str.indexOf('RRR') !== -1 || str.indexOf('LLL') !== -1){
return addAtRandomPos(arr,chr);
}else{
arr.splice(pos,0,chr);
}
return arr;
}
function getRandomInt (min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function fillArray(chr,num){
var arr = [];
for(i = 0; i < num; i++){
arr.push(chr);
}
return arr;
}

Related

Javascript while loop (Card deck simulation)

I am having an issue with the following code that simulates a card deck.
The deck is created properly (1 array containing 4 arrays (suits) containing 13 elements each (face values)) and when I use the G.test(); function it is correctly pulling 13 random cards but then returns 39x "Empty" (A total of 52).
I hate to ask for help, but I have left the problem overnight and then some and I still cannot find the reason that this is happening. I appreciate any and all insight that can be offered.
var G = {};
G.cards = [[], [], [], []];
G.newCard = function(v) { //currently a useless function, tried a few things
return v;
};
G.deck = {
n: function() { //new deck
var x; var list = [];
list.push(G.newCard("A"));
for (x = 2; x <= 10; x += 1) {
list.push(G.newCard(x.toString()));
}
list.push(G.newCard("J"), G.newCard("Q"), G.newCard("K"));
for (x = 0; x < G.cards.length; x += 1) {
G.cards[x] = list;
}
},
d: function() { //random card - returns suit & value
var s; var c; var v; var drawn = false; var n;
s = random(0, G.cards.length);
c = random(0, G.cards[s].length);
n = 0;
while (!drawn) {
if (G.cards[s].length > 0) {
if (G.cards[s][c]) {
v = G.cards[s].splice(c, 1);
drawn = true;
} else {
c = random(0, G.cards[s].length);
}
} else {
s = (s + 1 >= G.cards.length) ? 0 : s + 1;
n += 1;
console.log(s);
if (n >= G.cards.length) {
console.log(n);
return "Empty";
}
}
}
return {s: s, v: v[0]};
},
}; //G.deck
G.test = function() {
var x; var v;
G.deck.n();
for (x = 0; x < 52; x += 1) {
v = G.deck.d();
console.log(v);
}
};
Replace
for (x = 0; x < G.cards.length; x += 1) {
G.cards[x] = list;
}
with
for (x = 0; x < G.cards.length; x += 1) {
G.cards[x] = list.slice();
}
as this prevents all elements of G.cards[x] binding to the same (single) array instance.
If all elements bind to the same instance, mutating one element equals mutating all elements. list.slice() creates a new copy of list and thus a new array instance to prevent the aforementioned issue.
I won't go through your code, but I built a code that will do what you wanted. I only built this for one deck and not multiple deck play. There are two functions, one will generate the deck, and the other will drawn cards from the deck, bases on how many hands you need and how many cards you wanted for each hand. One a card is drawn, it will not be re-drawn. I might publish a short article for how a card dealing program work or similar in the short future at http://kevinhng86.iblog.website.
function random(min, max){
return Math.floor(Math.random() * (max - min)) + min;
}
function deckGenerate(){
var output = [];
var face = {1: "A", 11: "J", 12: "Q", 13: "K"};
// Heart Space Diamond & Club;
var suit = ["H", "S", "D", "C"];
// Delimiter between card identification and suit identification.
var d = "-";
for(i = 0; i < 4; i++){
output[i] = [];
for(ind = 0; ind < 13; ind++ ){
card = (ind + 1);
output[i][ind] = (card > 10) || (card === 1)? face[card] + d + suit[i] : card.toString() + d + suit[i];
}
}
return output;
}
function randomCard(deck, hand, card){
var output = [];
var randS = 0;
var randC = 0;
if( hand * card > 52 ) throw("Too many card, I built this for one deck only");
for(i = 0; i < hand; i++){
output[i] = [];
for(ind = 0; ind < card; ind++){
randS = random(0, deck.length);
randC = random(0, deck[randS].length);
output[i][ind] = deck[randS][randC];
deck[randS].splice(randC,1);
if(deck[randS].length === 0) deck.splice(randS,1);
}
}
document.write( JSON.stringify(deck, null, 2) );
return output;
}
var deck = deckGenerate()
document.write( JSON.stringify(deck, null, 2) );
document.write("<br><br>");
var randomhands = randomCard(deck, 5, 8);
document.write("<br><br>");
document.write("<br><br>");
document.write( JSON.stringify(randomhands, null, 2) );

Generating alphanumerical sequence javascript

I have written a terribly slow function for generating codes that go from AA000 to ZZ999 (in sequence not random). And I have concluded that there has to be a better way to do this. Any suggestions on how to make this faster?
function generateAlphaNumeric(){
theAlphabet = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'];
resultArrray = [];
resultArrray2 = [];
teller = 0;
for(i in theAlphabet){
for(x in theAlphabet){
resultArrray[teller] = theAlphabet[i] + theAlphabet[x];
teller++;
}
}
teller = 0;
for(x = 0; x<10; x++){
for(y = 0; y<10; y++){
for(z = 0; z<10; z++){
resultArrray2[teller] = x.toString() + y.toString() +z.toString();
teller++;
}
}
}
teller = 0;
finalArray = [];
for(index in resultArrray){
for(i in resultArrray2){
finalArray[teller] = resultArrray[index] + resultArrray2[i];
teller++;
}
}
//console.log(resultArrray);
//console.log(resultArrray2);
console.log(finalArray);
}
This should be considerably faster:
var theAlphabet = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
'P','Q','R','S','T','U','V','W','X','Y','Z'];
var theDigits = ['0','1','2','3','4','5','6','7','8','9'];
var result = [];
for (var i=0 ; i<26 ; i++) {
var prefix1 = theAlphabet[i];
for (var j=0 ; j<26; j++) {
var prefix2 = prefix1 + theAlphabet[j];
for(var x = 0; x<10; x++){
var prefix3 = prefix2 + theDigits[x];
for(var y = 0; y<10; y++){
var prefix4 = prefix3 + theDigits[y];
for(var z = 0; z<10; z++){
result.push(prefix4 + theDigits[z]);
}
}
}
}
}
Key ideas:
Generate everything in one run
Reuse partial strings as much as possible
However, I don't see how such an exhaustive list is useful. There are exactly 26 * 26 * 1000 different codes. So instead of maintaining an array with all codes it could make sense to simply build a function that generates the specific code requested:
function getCode(number) {
var z = number % 10;
number -= z; number /= 10;
var y = number % 10;
number -= y; number /= 10;
var x = number % 10;
number -= x; number /= 10;
var a = number % 26;
number -= a; number /= 26;
var b = number;
return theAlphabet[a] + theAlphabet[b] + theDigits[x] + theDigits[y] + theDigits[z];
}
function generate() {
var str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
array = [];
for (var i = 0; i < str.length; i++) {
for (var j = 0; j < str.length; j++) {
for (var k = 0; k < 10; k++) {
for (var l = 0; l < 10; l++) {
for (var m = 0; m < 10; m++) {
ar.push(str[i] + str[j] + k + l + m);
}
}
}
}
}
return array;
}
console.log(generate());
This will generate a array of all the codes .. U can save that array and parse it easily using a loop.
Try this solution:
function generate() {
var str = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
ar = [];
for (var index1 = 0; index1 < str.length; index1++) {
for (var index2 = 0; index2 < str.length; index2++) {
for (var index3 = 0; index3 < 1000; index3++) {
ar.push(str[index1] + str[index2] + ('000' + index3).slice(-3));
}
}
}
return ar;
}
console.log(generate());
I didn't test it, but it should do the trick
function generateAlphaNumeric()
{
var theAlphabet = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'];
var result = [];
// Will take a random letter inside theAlphabet
// Math.floor(Math.random() * theAlphabet.length) will generate a random number between 0 and 25
var i = 0;
while(i<2)
{
var letter = theAlphabet[Math.floor(Math.random() * theAlphabet.length)];
result.push(letter);
i++;
}
i = 0;
while(i<3)
{
// Adds a random number between 0 and 9
result.push(Math.floor(Math.random() * 10));
i++;
}
return result;
}
From a computational complexity perspective, unfortunately this is the best you can do. From a sheer number of instructions perspective, you can do a bit better (as others have pointed out), but it's still going to be the same order of complexity (remember that constants / multipliers are irrelevant in big-O complexity). You can also optimize the storage a bit.
Think about it. Your array needs to have 26 * 26 * 10 * 10 * 10 members. This means you need to at least touch that many elements.
Let N = number of elements in the alphabet
Let M = number of elements in your digit queue
Best Case Order Complexity = O(N * N * M * M * M) (if all you had to do was assign values)
Best case storage complexity = same as above (you have to store all the codes)
Right now you are using the following operations:
for(i in theAlphabet){ // *O(N)*
for(x in theAlphabet){ // *O(N)*
resultArrray[teller] = theAlphabet[i] + theAlphabet[x];// *(O(1))*
}
}
for(x = 0; x<10; x++){ // O(M)
for(y = 0; y<10; y++){ // O(M)
for(z = 0; z<10; z++){ // O(M)
resultArrray2[teller] = x.toString() + y.toString() +z.toString(); // O(1) (technically this is O(length of x + y + z)
teller++;
}
}
}
for(index in resultArrray){ // O(N * N)
for(i in resultArrray2){ // O(M * M * M(
finalArray[teller] = resultArrray[index] + resultArrray2[i]; //O(1)
teller++;
}
}
So at the end of the day your order complexity is O(N * N * M * M * M), which is the best you can do.
The bigger question is why you want to generate all the codes at all. If all you want is to create a unique code per order number or something, you can make a state machine like:
function getNextCode(previousCode) {
// in here, just increment the previous code
}
If all you want is a random identifier, consider using a hash of the timestamp + something about the request instead.
If you don't care about uniqueness, you can always just generate a random code.
All of the above are O(1).

Generate a random 4 digit number with no repeating digits

I have this function to create a random 4 digit number:
generateCode = function(){
var fullNumber = [];
var digit1 = Math.floor(Math.random() * 9) + 1;
var digit2 = Math.floor(Math.random() * 9) + 1;
var digit3 = Math.floor(Math.random() * 9) + 1;
var digit4 = Math.floor(Math.random() * 9) + 1;
fullNumber.push(digit1, digit2, digit3, digit4);
this.theCode(fullNumber.join("")) ;
}
But I need to create a 4 digit random number with no repeating digits.
So "1234" or "9673". But not "1145" or "5668"
Is there an efficient way to do this?
You can use this handy shuffle function to shuffle an array containing the numbers, and pick the first 4.
function random4Digit(){
return shuffle( "0123456789".split('') ).join('').substring(0,4);
}
function shuffle(o){
for(var j, x, i = o.length; i; j = Math.floor(Math.random() * i), x = o[--i], o[i] = o[j], o[j] = x);
return o;
}
alert( random4Digit() );
Edit: If you actually want a Number not starting with 0:
function random4DigitNumberNotStartingWithZero(){
// I did not include the zero, for the first digit
var digits = "123456789".split(''),
first = shuffle(digits).pop();
// Add "0" to the array
digits.push('0');
return parseInt( first + shuffle(digits).join('').substring(0,3), 10);
}
function shuffle(o){
for(var j, x, i = o.length; i; j = Math.floor(Math.random() * i), x = o[--i], o[i] = o[j], o[j] = x);
return o;
}
alert( random4DigitNumberNotStartingWithZero() );
Create an array of numbers, randomize it, then slice out the first 4 and join them together.
var numbers = [0,1,2,3,4,5,6,7,8,9];
function shuffleArray(array) {
for (var i = array.length - 1; i > 0; i--) {
var j = Math.floor(Math.random() * (i + 1));
var temp = array[i];
array[i] = array[j];
array[j] = temp;
}
return array;
}
shuffleArray(numbers).slice(0,4).join('');
It really depends on how you'd like to approach the problem. One solution is to store the digits you've used already in an array, then search the array when you generate a number. If the generated number is contained in the array, then you try generating another number. Doing a few different steps like this is also easier to follow if you have a few different functions. See my included example below:
var checkRandomUsed = function(array, number){
return array.indexOf(number) > -1;
}
var getCheckedRandom = function(array){
var random = getRandomNum();
while (checkRandomUsed(array, random)){
random = getRandomNum();
}
return random;
}
var getRandomNum = function(){
return Math.floor(Math.random() * 9) + 1;
}
var generateCode = function(){
var usedNumbers = [];
for (var i = 0; i < 4; i++){
usedNumbers.push(getCheckedRandom(usedNumbers));
}
return usedNumbers.join("");
}
As an update, try this...
const value = (new Array(4))
.map(() => (Math.floor(Math.random() * 9) + 1).toString()).reduce((p, c) => p + c);
This will do it if you're ok with potentially having 0 as the first digit:
var getNum = () => randoSequence(0, 9).slice(-4).join("");
console.log(getNum());
<script src="https://randojs.com/1.0.0.js"></script>
This will do it without 0 as the first digit:
function getNum(){
var sequenece = randoSequence(0, 9).slice(-4);
return (sequenece[6] ? sequenece.slice(-4) : sequenece.slice(0, 4)).join("");
}
console.log(getNum());
<script src="https://randojs.com/1.0.0.js"></script>
Both of these codes use rando.js to simplify the randomness and make it more readable, so don't forget to add the script tag to the head of your HTML document if you want to use either of them.

A while loop to add the digits of a multi-digit number together? (Javascript)

I need to add the digits of a number together (e.g. 21 is 2+1) so that the number is reduced to only one digit (3). I figured out how to do that part.
However,
1) I may need to call the function more than once on the same variable (e.g. 99 is 9+9 = 18, which is still >= 10) and
2) I need to exclude the numbers 11 and 22 from this function's ambit.
Where am I going wrong below?
var x = 123;
var y = 456;
var z = 789;
var numberMagic = function (num) {
var proc = num.toString().split("");
var total = 0;
for (var i=0; i<proc.length; i++) {
total += +proc[i];
};
};
while(x > 9 && x != 11 && x != 22) {
numberMagic(x);
};
} else {
xResult = x;
};
console.log(xResult);
//repeat while loop for y and z
Here are the problems with your code
var x = 123;
var y = 456;
var z = 789;
var numberMagic = function (num) {
var proc = num.toString().split("");
var total = 0;
for (var i=0; i<proc.length; i++) {
total += +proc[i]; // indentation want awry
}; // don't need this ; - not a show stopper
// you're not returning anything!!!!
};
while(x > 9 && x != 11 && x != 22) {
numberMagic(x);
}; // ; not needed
// because x never changes, the above while loop would go on forever
} else { // this else has no if
xResult = x; // even if code was right, x remains unchanged
};
console.log(xResult);
Hope that helps in some way
Now - here's a solution that works
var x = 123;
var y = 456;
var z = 789;
var numberMagic = function (num) {
while (num > 9) {
if (num == 11 || num == 22) {
return num;
}
var proc = num.toString().split("");
num = proc.reduce(function(previousInt, thisValueString) {
return previousInt + parseInt(thisValueString);
}, 0);
}
return num;
}
console.log(numberMagic(x));
console.log(numberMagic(y));
console.log(numberMagic(z));
I'm not sure to understand what you want..
with this function you reduce any number to one single digit
while(num > 9){
if(num == 11 || num == 22) return;
var proc = num.toString();
var sum = 0;
for(var i=0; i<proc.length; i++) {
sum += parseInt(proc[i]);
}
num = sum;
}
is it what you are looking at?
I wrote an example at Jsfiddle that you can turn any given number into a single digit:
Example input: 551
array of [5, 5, 1] - add last 2 digits
array of [5, 6] - add last 2 digits
array of [1, 1] - add last 2 digits
array of [2] - output
Here is the actual code:
var number = 1768;
var newNumber = convertToOneDigit(number);
console.log("New Number: " + newNumber);
function convertToOneDigit(number) {
var stringNumber = number.toString();
var stringNumberArray = stringNumber.split("");
var stringNumberLength = stringNumberArray.length;
var tmp;
var tmp2;
var tmp3;
console.log("Array: " + stringNumberArray);
console.log("Array Length: " + stringNumberLength);
while (stringNumberLength > 1) {
tmp = parseInt(stringNumberArray[stringNumberLength - 1]) + parseInt(stringNumberArray[stringNumberLength - 2]);
stringNumberArray.pop();
stringNumberArray.pop();
tmp2 = tmp.toString();
if (tmp2.length > 1) {
tmp3 = tmp2.split("");
for (var i = 0; i < tmp3.length; i++) {
stringNumberArray.push(tmp3[i]);
}
} else {
stringNumberArray.push(tmp2);
}
stringNumberLength = stringNumberArray.length;
console.log("Array: " + stringNumberArray);
console.log("Array Length: " + stringNumberLength);
}
return stringNumberArray[0];
}
function addDigits(n) {
let str = n.toString().split('');
let len = str.length;
let add,
acc = 0;
for (i=0; i<=len-1; i++) {
acc += Number(str[i]);
}
return acc;
}
console.log( addDigits(123456789) ); //Output: 45
Just make it a While loop, remember a While loops it's just the same as a For loop, only you add the counter variable at the end of the code, the same way you can do with a Do{code}while(condition) Only need to add a counter variable at the end and its gonna be the same. Only that the variable its global to the loop, I mean comes from the outside.
Ej.
let i = 0; //it's global to the loop, ( wider scope )
while (i<=x) {
//Code line;
//Code line;
//Code line;
//Code line;
i++
}
Now this is working with an outside variable and it's NOT recommended.. unless that var its local to a Function.
Please look at the this solution also
var x = 123;
var y = 456;
var z = 789;
var numberMagic = function (num) {
var total = 0;
while (num != 0) {
total += num % 10;
num = parseInt(num / 10);
}
console.log(total);
if (total > 9)
numberMagic(total);
else
return total;
}
//Call first time function
numberMagic(z);

Compare Strings Javascript Return %of Likely

I am looking for a JavaScript function that can compare two strings and return the likeliness that they are alike. I have looked at soundex but that's not really great for multi-word strings or non-names. I am looking for a function like:
function compare(strA,strB){
}
compare("Apples","apple") = Some X Percentage.
The function would work with all types of strings, including numbers, multi-word values, and names. Perhaps there's a simple algorithm I could use?
Ultimately none of these served my purpose so I used this:
function compare(c, u) {
var incept = false;
var ca = c.split(",");
u = clean(u);
//ca = correct answer array (Collection of all correct answer)
//caa = a single correct answer word array (collection of words of a single correct answer)
//u = array of user answer words cleaned using custom clean function
for (var z = 0; z < ca.length; z++) {
caa = $.trim(ca[z]).split(" ");
var pc = 0;
for (var x = 0; x < caa.length; x++) {
for (var y = 0; y < u.length; y++) {
if (soundex(u[y]) != null && soundex(caa[x]) != null) {
if (soundex(u[y]) == soundex(caa[x])) {
pc = pc + 1;
}
}
else {
if (u[y].indexOf(caa[x]) > -1) {
pc = pc + 1;
}
}
}
}
if ((pc / caa.length) > 0.5) {
return true;
}
}
return false;
}
// create object listing the SOUNDEX values for each letter
// -1 indicates that the letter is not coded, but is used for coding
// 0 indicates that the letter is omitted for modern census archives
// but acts like -1 for older census archives
// 1 is for BFPV
// 2 is for CGJKQSXZ
// 3 is for DT
// 4 is for L
// 5 is for MN my home state
// 6 is for R
function makesoundex() {
this.a = -1
this.b = 1
this.c = 2
this.d = 3
this.e = -1
this.f = 1
this.g = 2
this.h = 0
this.i = -1
this.j = 2
this.k = 2
this.l = 4
this.m = 5
this.n = 5
this.o = -1
this.p = 1
this.q = 2
this.r = 6
this.s = 2
this.t = 3
this.u = -1
this.v = 1
this.w = 0
this.x = 2
this.y = -1
this.z = 2
}
var sndx = new makesoundex()
// check to see that the input is valid
function isSurname(name) {
if (name == "" || name == null) {
return false
} else {
for (var i = 0; i < name.length; i++) {
var letter = name.charAt(i)
if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) {
return false
}
}
}
return true
}
// Collapse out directly adjacent sounds
// 1. Assume that surname.length>=1
// 2. Assume that surname contains only lowercase letters
function collapse(surname) {
if (surname.length == 1) {
return surname
}
var right = collapse(surname.substring(1, surname.length))
if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) {
return surname.charAt(0) + right.substring(1, right.length)
}
return surname.charAt(0) + right
}
// Collapse out directly adjacent sounds using the new National Archives method
// 1. Assume that surname.length>=1
// 2. Assume that surname contains only lowercase letters
// 3. H and W are completely ignored
function omit(surname) {
if (surname.length == 1) {
return surname
}
var right = omit(surname.substring(1, surname.length))
if (!sndx[right.charAt(0)]) {
return surname.charAt(0) + right.substring(1, right.length)
}
return surname.charAt(0) + right
}
// Output the coded sequence
function output_sequence(seq) {
var output = seq.charAt(0).toUpperCase() // Retain first letter
output += "-" // Separate letter with a dash
var stage2 = seq.substring(1, seq.length)
var count = 0
for (var i = 0; i < stage2.length && count < 3; i++) {
if (sndx[stage2.charAt(i)] > 0) {
output += sndx[stage2.charAt(i)]
count++
}
}
for (; count < 3; count++) {
output += "0"
}
return output
}
// Compute the SOUNDEX code for the surname
function soundex(value) {
if (!isSurname(value)) {
return null
}
var stage1 = collapse(value.toLowerCase())
//form.result.value=output_sequence(stage1);
var stage1 = omit(value.toLowerCase())
var stage2 = collapse(stage1)
return output_sequence(stage2);
}
function clean(u) {
var u = u.replace(/\,/g, "");
u = u.toLowerCase().split(" ");
var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"];
var n = [];
for (var y = 0; y < u.length; y++) {
var test = false;
for (var z = 0; z < cw.length; z++) {
if (u[y] != "" && u[y] != cw[z]) {
test = true;
break;
}
}
if (test) {
//Don't use & or $ in comparison
var val = u[y].replace("$", "").replace("&", "");
n.push(val);
}
}
return n;
}
Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance
function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
For calculating edit distance
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
Usage
similarity('Stack Overflow','Stack Ovrflw')
returns 0.8571428571428571
You can play with it below:
function checkSimilarity(){
var str1 = document.getElementById("lhsInput").value;
var str2 = document.getElementById("rhsInput").value;
document.getElementById("output").innerHTML = similarity(str1, str2);
}
function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div>
<div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div>
<div>Match: <span id="output">No Input</span></div>
Using this library for string similarity worked like a charm for me!
Here's the Example -
var similarity = stringSimilarity.compareTwoStrings("Apples","apple"); // => 0.88
Here is a very simple function that does a comparison and returns a percentage based on equivalency. While it has not been tested for all possible scenarios, it may help you get started.
function similar(a,b) {
var equivalency = 0;
var minLength = (a.length > b.length) ? b.length : a.length;
var maxLength = (a.length < b.length) ? b.length : a.length;
for(var i = 0; i < minLength; i++) {
if(a[i] == b[i]) {
equivalency++;
}
}
var weight = equivalency / maxLength;
return (weight * 100) + "%";
}
alert(similar("test","tes")); // 75%
alert(similar("test","test")); // 100%
alert(similar("test","testt")); // 80%
alert(similar("test","tess")); // 75%
To Find degree of similarity between two strings; we can use more than one or two methods but I am mostly inclined towards the usage of 'Dice's Coefficient' . which is better! well in my knowledge than using 'Levenshtein distance'
Using this 'string-similarity' package from npm you will be able to work on what I said above.
some easy usage examples are
var stringSimilarity = require('string-similarity');
var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');
var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
for more please visit the link given above. Thankyou.
Just one I quickly wrote that might be good enough for your purposes:
function Compare(strA,strB){
for(var result = 0, i = strA.length; i--;){
if(typeof strB[i] == 'undefined' || strA[i] == strB[i]);
else if(strA[i].toLowerCase() == strB[i].toLowerCase())
result++;
else
result += 4;
}
return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length));
}
This weighs characters that are the same but different case 1 quarter as heavily as characters that are completely different or missing. It returns a number between 0 and 1, 1 meaning the strings are identical. 0 meaning they have no similarities. Examples:
Compare("Apple", "Apple") // 1
Compare("Apples", "Apple") // 0.8181818181818181
Compare("Apples", "apple") // 0.7727272727272727
Compare("a", "A") // 0.75
Compare("Apples", "appppp") // 0.45833333333333337
Compare("a", "b") // 0
How about function similar_text from PHP.js library?
It is based on a PHP function with the same name.
function similar_text (first, second) {
// Calculates the similarity between two strings
// discuss at: http://phpjs.org/functions/similar_text
if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') {
return 0;
}
first += '';
second += '';
var pos1 = 0,
pos2 = 0,
max = 0,
firstLength = first.length,
secondLength = second.length,
p, q, l, sum;
max = 0;
for (p = 0; p < firstLength; p++) {
for (q = 0; q < secondLength; q++) {
for (l = 0;
(p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++);
if (l > max) {
max = l;
pos1 = p;
pos2 = q;
}
}
}
sum = max;
if (sum) {
if (pos1 && pos2) {
sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2));
}
if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) {
sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max));
}
}
return sum;
}
fuzzyset - A fuzzy string set for javascript.
fuzzyset is a data structure that performs something akin to fulltext search against data to determine likely mispellings and approximate string matching. Note that this is a javascript port of a python library.
To some extent, I like the ideas of Dice's coefficient embedded in the string-similarity module. But I feel that considering the bigrams only and not taking into account their multiplicities is missing some important data. Below is a version that also handles multiplicities, and I think is a simpler implementation overall. I don't try to use their API, offering only a function which compares two strings after some manipulation (removing non-alphanumeric characters, lower-casing everything, and compressing but not removing whitespace), built atop one which compares them without that manipulation. It would be easy enough to wrap this back in their API, but I see little need.
const stringSimilarity = (a, b) =>
_stringSimilarity (prep (a), prep (b))
const _stringSimilarity = (a, b) => {
const bg1 = bigrams (a)
const bg2 = bigrams (b)
const c1 = count (bg1)
const c2 = count (bg2)
const combined = uniq ([... bg1, ... bg2])
.reduce ((t, k) => t + (Math .min (c1 [k] || 0, c2 [k] || 0)), 0)
return 2 * combined / (bg1 .length + bg2 .length)
}
const prep = (str) => // TODO: unicode support?
str .toLowerCase () .replace (/[^\w\s]/g, ' ') .replace (/\s+/g, ' ')
const bigrams = (str) =>
[...str] .slice (0, -1) .map ((c, i) => c + str [i + 1])
const count = (xs) =>
xs .reduce ((a, x) => ((a [x] = (a [x] || 0) + 1), a), {})
const uniq = (xs) =>
[... new Set (xs)]
console .log (stringSimilarity (
'foobar',
'Foobar'
)) //=> 1
console .log (stringSimilarity (
"healed",
"sealed"
))//=> 0.8
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: table in very good condition, olive green in colour."
)) //=> 0.7787610619469026
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: green Subaru Impreza, 210,000 miles"
)) //=> 0.38636363636363635
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"Wanted: mountain bike with at least 21 gears."
)) //=> 0.1702127659574468
console .log (stringSimilarity (
"The rain in Spain falls mainly on the plain.",
"The run in Spun falls munly on the plun.",
)) //=> 0.7560975609756098
console .log (stringSimilarity (
"Fa la la la la, la la la la",
"Fa la la la la, la la",
)) //=> 0.8636363636363636
console .log (stringSimilarity (
"car crash",
"carcrash",
)) //=> 0.8
console .log (stringSimilarity (
"Now is the time for all good men to come to the aid of their party.",
"Huh?",
)) //=> 0
.as-console-wrapper {max-height: 100% !important; top: 0}
Some of the test cases are from string-similarity, others are my own. They show some significant differences from that package, but nothing untoward. The only one I would call out is the difference between "car crash" and "carcrash", which string-similarity sees as identical and I report with a similarity of 0.8. My version finds more similarity in all the olive-green test-cases than does string-similarity, but as these are in any case fairly arbitrary numbers, I'm not sure how much difference it makes; they certainly position them in the same relative order.
string-similarity lib vs Top answer (by #overloard1234) performance comparation you can find below
Based on #Tushar Walzade's advice to use string-similarity library, you can find, that for example
stringSimilatityLib.findBestMatch('KIA','Kia').bestMatch.rating
will return 0.0
So, looks like better to compare it in lowerCase.
Better base usage (for arrays) :
findBestMatch(str, strArr) {
const lowerCaseArr = strArr.map(element => element.toLowerCase());//creating lower case array
const match = stringSimilatityLib.findBestMatch(str.toLowerCase(), lowerCaseArr).bestMatch; //trying to find bestMatch
if (match.rating > 0) {
const foundIndex = lowerCaseArr.findIndex(x => x === match.target); //finding the index of found best case
return strArr[foundIndex]; //returning initial value from array
}
return null;
},
Performance
Also, i compared top answer here (made by #overloard1234) and string-similarity lib (v4.0.4).
The results you can find here : https://jsbench.me/szkzojoskq/1
Result : string-similarity is ~ twice faster
Just for fun : v2.0 of string-similarity library slower, than latest 4.0.4 about 2.2 times. So update it, if you are still using < 3.0 :)
const str1 = " pARTH PARmar r ";
const str2 = " parmar r par ";
function calculateSimilarity(str1 = "", str2 = "") {
let longer = str1.trim();
let shorter = str2.trim();
let a1 = longer.toLowerCase().split(" ");
let b1 = shorter.toLowerCase().split(" ");
let result = a1.every((aa, i) => aa[0] === b1[i][0]);
if (longer.length < shorter.length) [longer,shorter] = [shorter,longer];
var arr = [];
let count = 0;
for(var i = 0;i<longer.length;i++){
if(shorter && shorter.includes(longer[i])) {
shorter = shorter.replace(longer[i],"")
count++
};
}
return {
score : (count*100)/longer.length,
result
}
}
console.log(calculateSimilarity(str1, str2));
I used #overlord1234 function, but corrected ь: '', cuz English words don't have this letter, and next need return a[char] ?? char instead of return a[char] || char

Categories

Resources