Pythonize Javascript for loop - javascript

I am working on my bachelors work, and I am building something like interpreter. I want to convert the Javascript code into Python. I came to a point that I want to convert For Loop into Python which looks like this:
for(var x = 0; x < some_length; x++) {
}
in python it would look like this:
for x in range(0, some_length):
the problem is I want to cover all the cases that can happen, like for example going backwards like this:
for(var x = some_length - 1; x >= 0; x--) {
}
can somebody help me to write a function that will transpile this into python?
IMPORTANT EDIT!!!
as input to the function that will transpile the for loop into python are:
function pythonize(firstAction, condition, action)
where
firstAction = "x = 0",
condition = "x < some_length"
action = "x++"

It will be like, you can check range function for more of its detailed usage:
for x in range(some_length - 1, -1, -1):
Notice I change the step from x++ to decrement, otherwise the for loop won't terminate.

range accepts start, end, step, so you can do
for x in range(some_length-1, -1, -1)
Another option is
for x in reversed(range(some_length))
note: I assume you mean x-- in your second example

I've created this long function to transpile javascript for loop into python for loop:
function pyt(c) {
var cc = c.split('(')[1];
cc = cc.substr(0,cc.length-1);
pp = cc.split(';');
var r1 = /[A-Za-z\$\_]{1}[A-Za-z0-9\_]* ?= ?(.*)/.exec(pp[0])[1];
var r2 = '';
if(pp[1].indexOf('<') > -1) {
r2 = /[A-Za-z\$\_]{1}[A-Za-z0-9\_]* ?<=? ?(.*)/.exec(pp[1])[1]
} else if(pp[1].indexOf('>') > -1) {
r2 = /[A-Za-z\$\_]{1}[A-Za-z0-9\_]* ?>=? ?(.*)/.exec(pp[1])[1]
}
var delta = 0;
if(pp[1].indexOf('>=')) {
delta = -1;
}
if(pp[1].indexOf('<=')) {
delta = 1;
}
var e2 = 0;
if(!isNaN(r2)) {
e2 = Number(r2) + delta;
} else {
if(delta !== 0) {
e2 = r2 + (delta > 0 ? '+' : '-') + delta;
} else {
e2 = r2;
}
}
var r3 = '';
if(/[A-Za-z\$\_]{1}[A-Za-z0-9\_]*\+\+/.test(pp[2])) {
r3 = '1';
} else if(/[A-Za-z\$\_]{1}[A-Za-z0-9\_]*\-\-/.test(pp[2])) {
r3 = '-1';
} else {
if(pp[2].indexOf('+=') > -1) {
r3 = /[A-Za-z\$\_]{1}[A-Za-z0-9\_]* ?\+= ?(.*)/.exec(pp[2])[1];
} else if(pp[2].indexOf('-=') > -1) {
r3 = /[A-Za-z\$\_]{1}[A-Za-z0-9\_]* ?\-= ?(.*)/.exec(pp[2])[1];
} else {
if(/([A-Za-z\$\_]{1}[A-Za-z0-9\_]*) ?= ?\1 ?\+ ?(.*)/.test(pp[2])) {
r3 = /([A-Za-z\$\_]{1}[A-Za-z0-9\_]*) ?= ?\1 ?\+ ?(.*)/.exec(pp[2])[2];
} else {
r3 = /([A-Za-z\$\_]{1}[A-Za-z0-9\_]*) ?= ?\1 ?\- ?(.*)/.exec(pp[2])[2];
}
}
}
p = 'for i in range(' + r1 + ', ' + e2 + ', ' + r3 + ')';
return p;
}
var c = "for(i = 30 + y;i>=10 - 1; i = i + 0.1)";
console.log(pyt(c));
https://jsfiddle.net/x7uhxbua/12/
It's basically using regex to do the magic. It covers most of the cases, but not all.

Related

Javascript while loop (Card deck simulation)

I am having an issue with the following code that simulates a card deck.
The deck is created properly (1 array containing 4 arrays (suits) containing 13 elements each (face values)) and when I use the G.test(); function it is correctly pulling 13 random cards but then returns 39x "Empty" (A total of 52).
I hate to ask for help, but I have left the problem overnight and then some and I still cannot find the reason that this is happening. I appreciate any and all insight that can be offered.
var G = {};
G.cards = [[], [], [], []];
G.newCard = function(v) { //currently a useless function, tried a few things
return v;
};
G.deck = {
n: function() { //new deck
var x; var list = [];
list.push(G.newCard("A"));
for (x = 2; x <= 10; x += 1) {
list.push(G.newCard(x.toString()));
}
list.push(G.newCard("J"), G.newCard("Q"), G.newCard("K"));
for (x = 0; x < G.cards.length; x += 1) {
G.cards[x] = list;
}
},
d: function() { //random card - returns suit & value
var s; var c; var v; var drawn = false; var n;
s = random(0, G.cards.length);
c = random(0, G.cards[s].length);
n = 0;
while (!drawn) {
if (G.cards[s].length > 0) {
if (G.cards[s][c]) {
v = G.cards[s].splice(c, 1);
drawn = true;
} else {
c = random(0, G.cards[s].length);
}
} else {
s = (s + 1 >= G.cards.length) ? 0 : s + 1;
n += 1;
console.log(s);
if (n >= G.cards.length) {
console.log(n);
return "Empty";
}
}
}
return {s: s, v: v[0]};
},
}; //G.deck
G.test = function() {
var x; var v;
G.deck.n();
for (x = 0; x < 52; x += 1) {
v = G.deck.d();
console.log(v);
}
};
Replace
for (x = 0; x < G.cards.length; x += 1) {
G.cards[x] = list;
}
with
for (x = 0; x < G.cards.length; x += 1) {
G.cards[x] = list.slice();
}
as this prevents all elements of G.cards[x] binding to the same (single) array instance.
If all elements bind to the same instance, mutating one element equals mutating all elements. list.slice() creates a new copy of list and thus a new array instance to prevent the aforementioned issue.
I won't go through your code, but I built a code that will do what you wanted. I only built this for one deck and not multiple deck play. There are two functions, one will generate the deck, and the other will drawn cards from the deck, bases on how many hands you need and how many cards you wanted for each hand. One a card is drawn, it will not be re-drawn. I might publish a short article for how a card dealing program work or similar in the short future at http://kevinhng86.iblog.website.
function random(min, max){
return Math.floor(Math.random() * (max - min)) + min;
}
function deckGenerate(){
var output = [];
var face = {1: "A", 11: "J", 12: "Q", 13: "K"};
// Heart Space Diamond & Club;
var suit = ["H", "S", "D", "C"];
// Delimiter between card identification and suit identification.
var d = "-";
for(i = 0; i < 4; i++){
output[i] = [];
for(ind = 0; ind < 13; ind++ ){
card = (ind + 1);
output[i][ind] = (card > 10) || (card === 1)? face[card] + d + suit[i] : card.toString() + d + suit[i];
}
}
return output;
}
function randomCard(deck, hand, card){
var output = [];
var randS = 0;
var randC = 0;
if( hand * card > 52 ) throw("Too many card, I built this for one deck only");
for(i = 0; i < hand; i++){
output[i] = [];
for(ind = 0; ind < card; ind++){
randS = random(0, deck.length);
randC = random(0, deck[randS].length);
output[i][ind] = deck[randS][randC];
deck[randS].splice(randC,1);
if(deck[randS].length === 0) deck.splice(randS,1);
}
}
document.write( JSON.stringify(deck, null, 2) );
return output;
}
var deck = deckGenerate()
document.write( JSON.stringify(deck, null, 2) );
document.write("<br><br>");
var randomhands = randomCard(deck, 5, 8);
document.write("<br><br>");
document.write("<br><br>");
document.write( JSON.stringify(randomhands, null, 2) );

How to optimize levenshtein distance for checking for a distance of 1?

I'm working on a game where I only need to check if there's a distance of 0 or 1 between two words and return true if that's the case. I found a general purpose levenshtein distance algorithm:
function levenshtein(s, t) {
if (s === t) { return 0; }
var n = s.length, m = t.length;
if (n === 0 || m === 0) { return n + m; }
var x = 0, y, a, b, c, d, g, h, k;
var p = new Array(n);
for (y = 0; y < n;) { p[y] = ++y; }
for (;
(x + 3) < m; x += 4) {
var e1 = t.charCodeAt(x);
var e2 = t.charCodeAt(x + 1);
var e3 = t.charCodeAt(x + 2);
var e4 = t.charCodeAt(x + 3);
c = x; b = x + 1; d = x + 2; g = x + 3; h = x + 4;
for (y = 0; y < n; y++) {
k = s.charCodeAt(y);
a = p[y];
if (a < c || b < c) { c = (a > b ? b + 1 : a + 1); }
else { if (e1 !== k) { c++; } }
if (c < b || d < b) { b = (c > d ? d + 1 : c + 1); }
else { if (e2 !== k) { b++; } }
if (b < d || g < d) { d = (b > g ? g + 1 : b + 1); }
else { if (e3 !== k) { d++; } }
if (d < g || h < g) { g = (d > h ? h + 1 : d + 1); }
else { if (e4 !== k) { g++; } }
p[y] = h = g; g = d; d = b; b = c; c = a;
}
}
for (; x < m;) {
var e = t.charCodeAt(x);
c = x;
d = ++x;
for (y = 0; y < n; y++) {
a = p[y];
if (a < c || d < c) { d = (a > d ? d + 1 : a + 1); }
else {
if (e !== s.charCodeAt(y)) { d = c + 1; }
else { d = c; }
}
p[y] = d;
c = a;
}
h = d;
}
return h;
}
Which works, but this spot is going to be a hotspot and be run potentially hundreds of thousands of times a second and I want to optimize it because I don't need a general purpose algorithm, just one that checks if there's a distance of 0 or 1.
I tried writing it and came up with this:
function closeGuess(guess, word) {
if (Math.abs(word.length - guess.length) > 1) { return false; }
var errors = 0, guessIndex = 0, wordIndex = 0;
while (guessIndex < guess.length || wordIndex < word.length) {
if (errors > 1) { return false; }
if (guess[guessIndex] !== word[wordIndex]) {
if (guess.length < word.length) { wordIndex++; }
else { guessIndex++; }
errors++;
} else {
wordIndex++;
guessIndex++;
}
}
return true;
}
But after profiling it I found that my code was twice as slow, which surprised me because I think the general purpose algorithm is O(n*m) and I think mine is O(n).
I've been testing the performance difference on this fiddle: https://jsfiddle.net/aubtze2L/3/
Are there any better algorithms I can use or any way I can optimize my code to be faster?
I don't see a more elegant way which is at the same time faster than the good old for-loop:
function lev01(a, b) {
let la = a.length;
let lb = b.length;
let d = 0;
switch (la - lb) {
case 0: // mutation
for (let i = 0; i < la; ++i) {
if (a.charAt(i) != b.charAt(i) && ++d > 1) {
return false;
}
}
return true;
case -1: // insertion
for (let i = 0; i < la + d; ++i) {
if (a.charAt(i - d) != b.charAt(i) && ++d > 1) {
return false;
}
}
return true;
case +1: // deletion
for (let i = 0; i < lb + d; ++i) {
if (a.charAt(i) != b.charAt(i - d) && ++d > 1) {
return false;
}
}
return true;
}
return false;
}
console.log(lev01("abc", "abc"));
console.log(lev01("abc", "abd"));
console.log(lev01("abc", "ab"));
console.log(lev01("abc", "abcd"));
console.log(lev01("abc", "cba"));
Performance comparison (Chrome):
80.33ms - lev01 (this answer)
234.84ms - lev
708.12ms - close
Consider the following cases:
If the difference in lengths of the terms is greater than 1, then
the Levenshtein distance between them will be greater than 1.
If the difference in lengths is exactly 1, then the shortest string must be equal to the longest string, with a single deletion (or insertion).
If the strings are the same length then you should
consider a modified version of Hamming distance which returns false
if two, different characters are found:
Here is a sample implementation:
var areSimilar;
areSimilar = function(guess, word) {
var charIndex, foundDiff, guessLength, lengthDiff, substring, wordLength, shortest, longest, shortestLength, offset;
guessLength = guess.length;
wordLength = word.length;
lengthDiff = guessLength - wordLength;
if (lengthDiff < -1 || lengthDiff > 1) {
return false;
}
if (lengthDiff !== 0) {
if (guessLength < wordLength) {
shortest = guess;
longest = word;
shortestLength = guessLength;
} else {
shortest = word;
longest = guess;
shortestLength = wordLength;
}
offset = 0;
for (charIndex = 0; charIndex < shortestLength; charIndex += 1) {
if (shortest[charIndex] !== longest[offset + charIndex]) {
if (offset > 0) {
return false; // second error
}
offset = 1;
if (shortest[charIndex] !== longest[offset + charIndex]) {
return false; // second error
}
}
}
return true; // only one error
}
foundDiff = false;
for (charIndex = 0; charIndex < guessLength; charIndex += 1) {
if (guess[charIndex] !== word[charIndex]) {
if (foundDiff) {
return false;
}
foundDiff = true;
}
}
return true;
};
I've updated your fiddle to include this method. Here are the results on my machine:
close: 154.61
lev: 176.72500000000002
sim: 32.48000000000013
Fiddle: https://jsfiddle.net/dylon/aubtze2L/11/
If you know that you are looking for distance 0 and 1, then the general purpose DP algorithm does not make sense (and by the way the algorithm you showed looks convoluted, take a look at a better explanation here).
To check that the distance is 0, all you need is to check whether 2 strings are the same. Now if the distance is one, it means that either insertion, deletion or substitution should have happened. So generate all possible deletion from the original string and check whether it is equal to second string. So you will get something like this:
for (var i = 0; i < s_1.length; i++) {
if s_2 == s_1.slice(0, i) + s_1.slice(i + 1) {
return true
}
}
For insertions and substitution you will need to know the alphabet of all characters. You can define it as a big string var alphabet = "abcde....". Now you do a similar thing, but when you introduce substitution or insertion, you also iterate over all elements in your alphabet. I am not planning to write the whole code here.
A couple of additional things. You can make a lot of micro-optimizations here. For example if the length of two strings are different by more than 1, they clearly can't have a distance 1. Another one relates to the frequencies of underlying characters in the string.

Javascript autoincrement index

I need to create a function or use if is possible an already made library to auto increment an index. For example if it starts with 'A' it has to be incremented to 'Z' and after 'Z' it has to start from 'A1' and as soon as . . .'B1','C1', ... 'Z1', 'A2','B2',... . Does exist something like this already made ?
My idea is this, but start from 'A' and don't add number . . .
function nextChar(cont,letter) {
if (cont === 0){return letter;}
else {
letter=letter.charCodeAt(0) + 1;
return String.fromCharCode(letter);
}
}
One of many options:
function nextIndex(idx) {
var m = idx.match(/^([A-Z])(\d*)$/)
if(!m)
return 'A';
if(m[1] == 'Z')
return 'A' + (Number(m[2] || 0) + 1);
return String.fromCharCode(m[1].charCodeAt(0) + 1) + m[2];
}
var a = "";
for(i = 0; i < 100; i++) {
a = nextIndex(a)
document.write(a + ", ")
}
This one's less efficient than georg's but maybe easier to understand at first glance:
for (var count = 0, countlen = 5; count < countlen; count++) {
for (var i = 65, l = i + 26; i < l; i++) {
console.log(String.fromCharCode(i) + (count !== 0 ? count : ''));
}
}
DEMO
Allow me to propose a solution more object-oriented:
function Index(start_with) {
this.reset = function(reset_to) {
reset_to = reset_to || 'A';
this.i = reset_to.length > 1 ? reset_to[1] : 0; // needs more input checking
this.c = reset_to[0].toUpperCase(); // needs more input checking
return this;
};
this.inc = function(steps) {
steps = steps || 1;
while(steps--) {
if (this.c === 'Z') {
this.i++;
this.c = 'A';
} else {
this.c = String.fromCharCode(this.c.charCodeAt(0) + 1);
}
}
return this;
};
this.toString = function() {
if (this.i === 0) return this.c;
return this.c + '' + this.i;
};
this.reset(start_with);
}
var a = new Index(); // A
console.log('a = ' + a.inc(24).inc().inc()); // Y, Z, A1
var b = new Index('B8'); // B8
console.log('a = ' + a.reset('Y').inc()); // Y, Z
console.log('b = ' + b); // B8
Another way to think about this is that your "A1" index is just the custom rendering of an integer: 0='A',1='B',26='A1',etc.
So you can also overload the Number object to render your index. The big bonus is that all the math operations still work since your are always dealing with numbers:
Number.prototype.asIndex = function() {
var n = this;
var i = Math.floor(n / 26);
var c = String.fromCharCode('A'.charCodeAt(0) + n % 26);
return '' + c + (i ? i : '');
}
Number.parseIndex = function(index) {
var m;
if (!index) return 0;
m = index.toUpperCase().match(/^([A-Z])(\d*)$/);
if (!m || !m[1]) return 0;
return Number((m[1].charCodeAt(0) - 'A'.charCodeAt(0)) + 26 * (m[2] ? m[2] : 0));
};
var c = 52;
var ic = c.asIndex();
var nc = Number.parseIndex(ic);
console.log(c+' = '+ic+' = '+nc); // 52 = A2 = 52
If you go this way I would try to check if the new methods don't already exist first...

jQuery Counter, animating to a total, but how do I include a comma?

I have a found a number of solutions on here for a counter, animating from one total to another.
Here's my what I'm using now:
jQuery.fn.extend({
ts : function (from, to, time) {
var steps = 1,
self = this,
counter;
if (from - to > 0) {
steps = -1;
};
from -= steps;
function step() {
self.text(from += steps);
if ((steps < 0 && to >= from) || (steps > 0 && from >= to)) {
clearInterval(counter);
};
};
counter = setInterval(step, time || 5);
}
});
var total = $('.total').ts(56000,56941);
It works well. However, I would like to add a comma to the total, something like 56,941. Is this possible?
I guess this will do it:
jQuery.fn.extend({
ts: function(from, to, time) {
var steps = 1, self = this, counter;
if (from - to > 0) steps = -1;
from -= steps;
function step() {
var x = (from += steps).toString().replace(/(\d)(?=(\d\d\d)+(?!\d))/g, "$1,");
self.text(x);
if ((steps < 0 && to >= from) || (steps > 0 && from >= to)) {
clearInterval(counter);
};
};
counter = setInterval(step, time || 5);
}
});
FIDDLE
From somewhere on the web...
function formatComma(x){
return (x+'').replace( /\B(?=(\d{3})+(?!\d))/g, ',');
}
I like my own solution of reversing the string better as it is easier to understand the logic...
function formatComma(x){
// prepare the input as a string ready for manipulating and returning
return (x+'')
// reverse the string (by converting to array)
.split("").reverse().join("")
// replace 3 digits in a row, with themselves and a comma
// so long as the digits are followed by a non-word boundary
.replace(/(\d{3})\B/g,'$1,')
// reverse it all again
.split("").reverse().join("")
}
This will work. The function is taken from http://ntt.cc/2008/04/25/6-very-basic-but-very-useful-javascript-number-format-functions-for-web-developers.html. Very handy
jQuery.fn.extend({
ts : function (from, to, time) {
var steps = 1,
self = this,
counter;
if (from - to > 0) {
steps = -1;
};
from -= steps;
function step() {
self.text(addCommas(from += steps));
if ((steps < 0 && to >= from) || (steps > 0 && from >= to)) {
clearInterval(counter);
};
};
counter = setInterval(step, time || 5);
}
});
var total = $('.total').ts(56000,56941);
function addCommas(nStr)
{
nStr += '';
x = nStr.split('.');
x1 = x[0];
x2 = x.length > 1 ? '.' + x[1] : '';
var rgx = /(\d+)(\d{3})/;
while (rgx.test(x1)) {
x1 = x1.replace(rgx, '$1' + ',' + '$2');
}
return x1 + x2;
}
​
Sure it is! You can check this plugin (http://code.google.com/p/jquery-numberformatter/) and implement it in yours :-)

Compare Strings Javascript Return %of Likely

I am looking for a JavaScript function that can compare two strings and return the likeliness that they are alike. I have looked at soundex but that's not really great for multi-word strings or non-names. I am looking for a function like:
function compare(strA,strB){
}
compare("Apples","apple") = Some X Percentage.
The function would work with all types of strings, including numbers, multi-word values, and names. Perhaps there's a simple algorithm I could use?
Ultimately none of these served my purpose so I used this:
function compare(c, u) {
var incept = false;
var ca = c.split(",");
u = clean(u);
//ca = correct answer array (Collection of all correct answer)
//caa = a single correct answer word array (collection of words of a single correct answer)
//u = array of user answer words cleaned using custom clean function
for (var z = 0; z < ca.length; z++) {
caa = $.trim(ca[z]).split(" ");
var pc = 0;
for (var x = 0; x < caa.length; x++) {
for (var y = 0; y < u.length; y++) {
if (soundex(u[y]) != null && soundex(caa[x]) != null) {
if (soundex(u[y]) == soundex(caa[x])) {
pc = pc + 1;
}
}
else {
if (u[y].indexOf(caa[x]) > -1) {
pc = pc + 1;
}
}
}
}
if ((pc / caa.length) > 0.5) {
return true;
}
}
return false;
}
// create object listing the SOUNDEX values for each letter
// -1 indicates that the letter is not coded, but is used for coding
// 0 indicates that the letter is omitted for modern census archives
// but acts like -1 for older census archives
// 1 is for BFPV
// 2 is for CGJKQSXZ
// 3 is for DT
// 4 is for L
// 5 is for MN my home state
// 6 is for R
function makesoundex() {
this.a = -1
this.b = 1
this.c = 2
this.d = 3
this.e = -1
this.f = 1
this.g = 2
this.h = 0
this.i = -1
this.j = 2
this.k = 2
this.l = 4
this.m = 5
this.n = 5
this.o = -1
this.p = 1
this.q = 2
this.r = 6
this.s = 2
this.t = 3
this.u = -1
this.v = 1
this.w = 0
this.x = 2
this.y = -1
this.z = 2
}
var sndx = new makesoundex()
// check to see that the input is valid
function isSurname(name) {
if (name == "" || name == null) {
return false
} else {
for (var i = 0; i < name.length; i++) {
var letter = name.charAt(i)
if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) {
return false
}
}
}
return true
}
// Collapse out directly adjacent sounds
// 1. Assume that surname.length>=1
// 2. Assume that surname contains only lowercase letters
function collapse(surname) {
if (surname.length == 1) {
return surname
}
var right = collapse(surname.substring(1, surname.length))
if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) {
return surname.charAt(0) + right.substring(1, right.length)
}
return surname.charAt(0) + right
}
// Collapse out directly adjacent sounds using the new National Archives method
// 1. Assume that surname.length>=1
// 2. Assume that surname contains only lowercase letters
// 3. H and W are completely ignored
function omit(surname) {
if (surname.length == 1) {
return surname
}
var right = omit(surname.substring(1, surname.length))
if (!sndx[right.charAt(0)]) {
return surname.charAt(0) + right.substring(1, right.length)
}
return surname.charAt(0) + right
}
// Output the coded sequence
function output_sequence(seq) {
var output = seq.charAt(0).toUpperCase() // Retain first letter
output += "-" // Separate letter with a dash
var stage2 = seq.substring(1, seq.length)
var count = 0
for (var i = 0; i < stage2.length && count < 3; i++) {
if (sndx[stage2.charAt(i)] > 0) {
output += sndx[stage2.charAt(i)]
count++
}
}
for (; count < 3; count++) {
output += "0"
}
return output
}
// Compute the SOUNDEX code for the surname
function soundex(value) {
if (!isSurname(value)) {
return null
}
var stage1 = collapse(value.toLowerCase())
//form.result.value=output_sequence(stage1);
var stage1 = omit(value.toLowerCase())
var stage2 = collapse(stage1)
return output_sequence(stage2);
}
function clean(u) {
var u = u.replace(/\,/g, "");
u = u.toLowerCase().split(" ");
var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"];
var n = [];
for (var y = 0; y < u.length; y++) {
var test = false;
for (var z = 0; z < cw.length; z++) {
if (u[y] != "" && u[y] != cw[z]) {
test = true;
break;
}
}
if (test) {
//Don't use & or $ in comparison
var val = u[y].replace("$", "").replace("&", "");
n.push(val);
}
}
return n;
}
Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance
function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
For calculating edit distance
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
Usage
similarity('Stack Overflow','Stack Ovrflw')
returns 0.8571428571428571
You can play with it below:
function checkSimilarity(){
var str1 = document.getElementById("lhsInput").value;
var str2 = document.getElementById("rhsInput").value;
document.getElementById("output").innerHTML = similarity(str1, str2);
}
function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div>
<div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div>
<div>Match: <span id="output">No Input</span></div>
Using this library for string similarity worked like a charm for me!
Here's the Example -
var similarity = stringSimilarity.compareTwoStrings("Apples","apple"); // => 0.88
Here is a very simple function that does a comparison and returns a percentage based on equivalency. While it has not been tested for all possible scenarios, it may help you get started.
function similar(a,b) {
var equivalency = 0;
var minLength = (a.length > b.length) ? b.length : a.length;
var maxLength = (a.length < b.length) ? b.length : a.length;
for(var i = 0; i < minLength; i++) {
if(a[i] == b[i]) {
equivalency++;
}
}
var weight = equivalency / maxLength;
return (weight * 100) + "%";
}
alert(similar("test","tes")); // 75%
alert(similar("test","test")); // 100%
alert(similar("test","testt")); // 80%
alert(similar("test","tess")); // 75%
To Find degree of similarity between two strings; we can use more than one or two methods but I am mostly inclined towards the usage of 'Dice's Coefficient' . which is better! well in my knowledge than using 'Levenshtein distance'
Using this 'string-similarity' package from npm you will be able to work on what I said above.
some easy usage examples are
var stringSimilarity = require('string-similarity');
var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');
var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
for more please visit the link given above. Thankyou.
Just one I quickly wrote that might be good enough for your purposes:
function Compare(strA,strB){
for(var result = 0, i = strA.length; i--;){
if(typeof strB[i] == 'undefined' || strA[i] == strB[i]);
else if(strA[i].toLowerCase() == strB[i].toLowerCase())
result++;
else
result += 4;
}
return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length));
}
This weighs characters that are the same but different case 1 quarter as heavily as characters that are completely different or missing. It returns a number between 0 and 1, 1 meaning the strings are identical. 0 meaning they have no similarities. Examples:
Compare("Apple", "Apple") // 1
Compare("Apples", "Apple") // 0.8181818181818181
Compare("Apples", "apple") // 0.7727272727272727
Compare("a", "A") // 0.75
Compare("Apples", "appppp") // 0.45833333333333337
Compare("a", "b") // 0
How about function similar_text from PHP.js library?
It is based on a PHP function with the same name.
function similar_text (first, second) {
// Calculates the similarity between two strings
// discuss at: http://phpjs.org/functions/similar_text
if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') {
return 0;
}
first += '';
second += '';
var pos1 = 0,
pos2 = 0,
max = 0,
firstLength = first.length,
secondLength = second.length,
p, q, l, sum;
max = 0;
for (p = 0; p < firstLength; p++) {
for (q = 0; q < secondLength; q++) {
for (l = 0;
(p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++);
if (l > max) {
max = l;
pos1 = p;
pos2 = q;
}
}
}
sum = max;
if (sum) {
if (pos1 && pos2) {
sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2));
}
if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) {
sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max));
}
}
return sum;
}
fuzzyset - A fuzzy string set for javascript.
fuzzyset is a data structure that performs something akin to fulltext search against data to determine likely mispellings and approximate string matching. Note that this is a javascript port of a python library.
To some extent, I like the ideas of Dice's coefficient embedded in the string-similarity module. But I feel that considering the bigrams only and not taking into account their multiplicities is missing some important data. Below is a version that also handles multiplicities, and I think is a simpler implementation overall. I don't try to use their API, offering only a function which compares two strings after some manipulation (removing non-alphanumeric characters, lower-casing everything, and compressing but not removing whitespace), built atop one which compares them without that manipulation. It would be easy enough to wrap this back in their API, but I see little need.
const stringSimilarity = (a, b) =>
_stringSimilarity (prep (a), prep (b))
const _stringSimilarity = (a, b) => {
const bg1 = bigrams (a)
const bg2 = bigrams (b)
const c1 = count (bg1)
const c2 = count (bg2)
const combined = uniq ([... bg1, ... bg2])
.reduce ((t, k) => t + (Math .min (c1 [k] || 0, c2 [k] || 0)), 0)
return 2 * combined / (bg1 .length + bg2 .length)
}
const prep = (str) => // TODO: unicode support?
str .toLowerCase () .replace (/[^\w\s]/g, ' ') .replace (/\s+/g, ' ')
const bigrams = (str) =>
[...str] .slice (0, -1) .map ((c, i) => c + str [i + 1])
const count = (xs) =>
xs .reduce ((a, x) => ((a [x] = (a [x] || 0) + 1), a), {})
const uniq = (xs) =>
[... new Set (xs)]
console .log (stringSimilarity (
'foobar',
'Foobar'
)) //=> 1
console .log (stringSimilarity (
"healed",
"sealed"
))//=> 0.8
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: table in very good condition, olive green in colour."
)) //=> 0.7787610619469026
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: green Subaru Impreza, 210,000 miles"
)) //=> 0.38636363636363635
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"Wanted: mountain bike with at least 21 gears."
)) //=> 0.1702127659574468
console .log (stringSimilarity (
"The rain in Spain falls mainly on the plain.",
"The run in Spun falls munly on the plun.",
)) //=> 0.7560975609756098
console .log (stringSimilarity (
"Fa la la la la, la la la la",
"Fa la la la la, la la",
)) //=> 0.8636363636363636
console .log (stringSimilarity (
"car crash",
"carcrash",
)) //=> 0.8
console .log (stringSimilarity (
"Now is the time for all good men to come to the aid of their party.",
"Huh?",
)) //=> 0
.as-console-wrapper {max-height: 100% !important; top: 0}
Some of the test cases are from string-similarity, others are my own. They show some significant differences from that package, but nothing untoward. The only one I would call out is the difference between "car crash" and "carcrash", which string-similarity sees as identical and I report with a similarity of 0.8. My version finds more similarity in all the olive-green test-cases than does string-similarity, but as these are in any case fairly arbitrary numbers, I'm not sure how much difference it makes; they certainly position them in the same relative order.
string-similarity lib vs Top answer (by #overloard1234) performance comparation you can find below
Based on #Tushar Walzade's advice to use string-similarity library, you can find, that for example
stringSimilatityLib.findBestMatch('KIA','Kia').bestMatch.rating
will return 0.0
So, looks like better to compare it in lowerCase.
Better base usage (for arrays) :
findBestMatch(str, strArr) {
const lowerCaseArr = strArr.map(element => element.toLowerCase());//creating lower case array
const match = stringSimilatityLib.findBestMatch(str.toLowerCase(), lowerCaseArr).bestMatch; //trying to find bestMatch
if (match.rating > 0) {
const foundIndex = lowerCaseArr.findIndex(x => x === match.target); //finding the index of found best case
return strArr[foundIndex]; //returning initial value from array
}
return null;
},
Performance
Also, i compared top answer here (made by #overloard1234) and string-similarity lib (v4.0.4).
The results you can find here : https://jsbench.me/szkzojoskq/1
Result : string-similarity is ~ twice faster
Just for fun : v2.0 of string-similarity library slower, than latest 4.0.4 about 2.2 times. So update it, if you are still using < 3.0 :)
const str1 = " pARTH PARmar r ";
const str2 = " parmar r par ";
function calculateSimilarity(str1 = "", str2 = "") {
let longer = str1.trim();
let shorter = str2.trim();
let a1 = longer.toLowerCase().split(" ");
let b1 = shorter.toLowerCase().split(" ");
let result = a1.every((aa, i) => aa[0] === b1[i][0]);
if (longer.length < shorter.length) [longer,shorter] = [shorter,longer];
var arr = [];
let count = 0;
for(var i = 0;i<longer.length;i++){
if(shorter && shorter.includes(longer[i])) {
shorter = shorter.replace(longer[i],"")
count++
};
}
return {
score : (count*100)/longer.length,
result
}
}
console.log(calculateSimilarity(str1, str2));
I used #overlord1234 function, but corrected ь: '', cuz English words don't have this letter, and next need return a[char] ?? char instead of return a[char] || char

Categories

Resources