Sort empty or null to bottom always - javascript

The following is a natural sort function I pulled from somewhere I forget exactly. I'm looking to modify it so that empty or null values always sort to the bottom regardless of asc/desc.
Here is what I have right now:
function gridNaturalSorter(a, b) {
if(a[sortcol])
a = a[sortcol].replace(/<(?:.|\n)*?>/gm, '');
if(b[sortcol])
b = b[sortcol].replace(/<(?:.|\n)*?>/gm, '');
if(b)
b = b.toString().substr(0, 15);
if(a)
a = a.toString().substr(0, 15);
var re = /(^([+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?)?$|^0x[0-9a-f]+$|\d+)/gi,
sre = /(^[ ]*|[ ]*$)/g,
dre = /(^([\w ]+,?[\w ]+)?[\w ]+,?[\w ]+\d+:\d+(:\d+)?[\w ]?|^\d{1,4}[\/\-]\d{1,4}[\/\-]\d{1,4}|^\w+, \w+ \d+, \d{4})/,
hre = /^0x[0-9a-f]+$/i,
ore = /^0/,
i = function(s) {
return gridNaturalSorter.insensitive && (''+s).toLowerCase() || ''+s
},
// convert all to strings strip whitespace
x = i(a).replace(sre, '') || '',
y = i(b).replace(sre, '') || '',
// chunk/tokenize
xN = x.replace(re, '\0$1\0').replace(/\0$/,'').replace(/^\0/,'').split('\0'),
yN = y.replace(re, '\0$1\0').replace(/\0$/,'').replace(/^\0/,'').split('\0'),
// numeric, hex or date detection
xD = parseInt(x.match(hre)) || (xN.length != 1 && x.match(dre) && Date.parse(x)),
yD = parseInt(y.match(hre)) || xD && y.match(dre) && Date.parse(y) || null,
oFxNcL, oFyNcL;
// first try and sort Hex codes or Dates
if (yD)
if ( xD < yD ) return -1;
else if ( xD > yD ) return 1;
// natural sorting through split numeric strings and default strings
for(var cLoc=0, numS=Math.max(xN.length, yN.length); cLoc < numS; cLoc++) {
// find floats not starting with '0', string or 0 if not defined (Clint Priest)
oFxNcL = !(xN[cLoc] || '').match(ore) && parseFloat(xN[cLoc]) || xN[cLoc] || 0;
oFyNcL = !(yN[cLoc] || '').match(ore) && parseFloat(yN[cLoc]) || yN[cLoc] || 0;
// handle numeric vs string comparison - number < string - (Kyle Adams)
if (isNaN(oFxNcL) !== isNaN(oFyNcL)) {
return (isNaN(oFxNcL)) ? 1 : -1;
}
// rely on string comparison if different types - i.e. '02' < 2 != '02' < '2'
else if (typeof oFxNcL !== typeof oFyNcL) {
oFxNcL += '';
oFyNcL += '';
}
if (oFxNcL < oFyNcL)
return -1;
if (oFxNcL > oFyNcL)
return 1;
}
return 0;
}

If you know how to implement multiple comparators and a comparator that sorts null to bottom that's quite easy.
To implement multiple comparators, you just have to return the result of the first comparator that doesn't return 0.
Here I also created a withComparators helper function that allows to compose multiple comparators together. If you understand this code you will be able to easily come up with your own solution for your specific problem.
Note that your gridNaturalSorter function is a comparator just like nullsToBottom is in my example.
E.g.
var items = ['test', null, 'test1', 'test3', null, 'test4'];
items.sort(withComparators(nullsToBottom, textAsc));
//["test", "test1", "test3", "test4", null, null]
function nullsToBottom(a, b) {
return a === b? 0 : a === null? 1 : -1;
}
function textAsc(a, b) {
return a < b? -1 : +(a > b);
}
function withComparators() {
var comparators = arguments;
return function (a, b) {
var len = comparators.length, i = 0, result;
for (; i < len; i++) {
result = comparators[i](a, b);
if (result) return result;
}
return 0;
};
}

Related

How to get the max occurrences of a number in an array

as answer to an exercise in which I had to create a function that given an array of numbers return the number with most occurrences, and if more than one number had the max number of occurrences return the minor one. This is the implementation I made, but I'm pulling my hair figuring out why it return 10 instead of 9 in the example.
It appears to be evaluating 10 < 9 as true. What's wrong?
function maxOccurencies(arr) {
var aux = [], max = 0, final = null;
for (var i=0,t=arr.length; i<t; i++) {
aux[arr[i]] = (aux[arr[i]] || 0) + 1;
if (aux[arr[i]] > max) max = aux[arr[i]];
}
for (x in aux) {
if ( aux[x] == max && (x < final || final == null)) {
final = x;
}
}
return final;
}
document.write(maxOccurencies([10,10,10,9,9,9,8,7,4,5,1]));
Putting typeof(x) in your second loop reveals that some of your variables are being cast as type string! Still looking into exactly where this is occurring. You can replace
if ( aux[x] == max && (x < final || final == null)) {
with
if ( aux[x] == max && (parseInt(x) < parseInt(final) || final == null)) {
to return the correct value of 9.
Edit:
Very interesting, I was unaware of Javascript's exact handling of arrays in for...in loops. See the following other questions for more information:
JavaScript For-each/For-in loop changing element types
Why is using “for…in” with array iteration such a bad idea?
Also note that you can use arr.forEach(function(element){...}); and the elements are returned with their types intact.
I think the problem is just that the x in aux is not a number so the if statement isn't evaluating correctly. when converted to a number then it returns 9 (below).
(3 == 3 && ("10" < "9" || "9" == null)) evaluates to true
function maxOccurencies(arr) {
var aux = [], max = 0, final = null;
for (var i=0,t=arr.length; i<t; i++) {
aux[arr[i]] = (aux[arr[i]] || 0) + 1;
if (aux[arr[i]] > max) max = aux[arr[i]];
}
for (x in aux) {
if ( aux[x] == max && (parseInt(x) < final || final == null)) {
final = parseInt(x);
}
}
return final;
}
document.write(maxOccurencies([10,10,10,9,9,9,8,7,4,5,1]));
"I'm pulling my hair figuring out why it return 10 instead of 9 in the example."
That's because in this sort of comparison, 10 is smaller than 9,8,7,6,5,4,3, 2 but a bit grater than 1.
:)
This small type correction will fix it:
function maxOccurences(arr) {
aux = [], max = 0, final = null;
for (var i=0,t=arr.length; i<t; i++) {
aux[arr[i]] = (aux[arr[i]] || 0) + 1;
if (aux[arr[i]] > max) max = aux[arr[i]];
}
for (x in aux) {
if ( aux[x] == max && (+x < final || final == null)) {
final = x;
}
}
return final;
}

a regex or function to extract all the parameters passed to a function

I am looking to find the passed parameter to a function
say i already have hello as function and i have a STRING as following
hello(1,'434','hello,word',"h,g",{a:'b,u', l : { "sk" : "list", bk : 'u,93' }, c : 9},true)
Then upon that regex or function i should be able to find following 6 strings
'1'
'"434"'
'"hello,world"'
'"h,g"'
'{"a":"b,u","l":{"sk":"list","bk": "u,93"},"c":9}'
'true'
As per urs question you can do it like this:
x =Hello(1,'434','hello,word',"h,g",{a:'b,u', l : { "sk" : "list", bk : 'u,93' }, c : 9},true);
function Hello() {
for (i = 0; i <arguments.length; i++) {
console.log(arguments[i])
}
}
You can take help of argument object which is an Array-like object corresponding to the arguments passed to a function.
If that's a string then you might have to escape the double quotes first to result like
var x = "hello(1,'434','hello,word',\"h,g\",{a:'b,u', l : { \"sk\" : \"list\", bk : 'u,93' }, c : 9},true)";
and then you may invoke it like
Function(x)();
and in the hello function you should iterate over the arguments object's properties like
function hello(){
Array.prototype.forEach.call(arguments, prop => console.log(prop));
}
This is my workaround. It may be error-prone, but it should be faster than the eval solutions.
var extractParameters = function(str){
var ar = [];
if(typeof str === 'string' && str.length){
var chars = str.split(','), cl = chars.length;
var pushInto = function(n){
try {
ar.push(JSON.parse(chars[n]));
} catch(er){
ar.push(undefined);
}
};
for(var di, si, eg, fg, n = 0; n < cl; n++){
eg = chars[n].charAt(0);
fg = chars[n].charAt(chars[n].length - 1);
if(eg === fg && (eg === '"' || eg === "'")){
chars[n] = "\"" + chars[n].substring(1, chars[n].length - 1) + "\"";
}
di = chars[n].indexOf('"');
si = chars[n].indexOf("'");
if(((si === -1) && (di === -1)) || (eg === fg && (eg === '"' || eg === "'")) ||
(chars[n].charAt(0) === "{" && chars[n].charAt(chars[n].length-1) === "}" && (chars[n].match(/\{/g).length === chars[n].match(/\}/g).length))){
pushInto(n);
} else if(n < (cl-1)) {
chars[n] = chars[n] + ','+ chars[n+1];
chars.splice(n+1,1);
n--;
cl--;
continue;
}
}
}
return ar;
};
fiddle : https://jsfiddle.net/jv0328tp/16/

splitting a string based on AND OR logic in javascript

My problem is to split a string which contains a logical operation.
For example, here is my sample string:
var rule = "device2.temperature > 20 || device2.humidity>68 && device3.temperature >10"
I need to parse that string in a way that I can easily operate my logic and I am not sure which approach would be better.
PS: Please keep in mind that those rule strings can have 10 or more different condition combinations, like 4 ANDs and 6 ORs.
Assuming no parentheses, I might go with something like this (JavaScript code):
function f(v,op,w){
var ops = {
'>': function(a,b){ return a > b; },
'<': function(a,b){ return a < b; },
'||': function(a,b){ return a || b; },
'&&': function(a,b){ return a && b; },
'==': function(a,b){ return a == b;}
}
if (ops[op]){
return ops[op](v,w);
} else alert('Could not recognize the operator, "' + op + '".');
}
Now if you can manage to get a list of expressions, you can evaluate them in series:
var exps = [[6,'>',7],'||',[12,'<',22], '&&', [5,'==',5]];
var i = 0,
result = typeof exps[i] == 'object' ? f(exps[i][0],exps[i][1],exps[i][2]) : exps[i];
i++;
while (exps[i] !== undefined){
var op = exps[i++],
b = typeof exps[i] == 'object' ? f(exps[i][0],exps[i][1],exps[i][2]) : exps[i];
result = f(result,op,b);
i++;
}
console.log(result);
If you are absolutely sure that the input is always going to be valid JavaScript
var rule = "device2.temperature > 20 || device2.humidity>68 && device3.temperature >10"
var rulePassed = eval(rule);
Keep in mind that in most cases "eval" is "evil" and has the potential to introduce more problems than it solves.
function parse(rule){
return Function("ctx", "return("+rule.replace(/[a-z$_][a-z0-9$_\.]*/gi, "ctx.$&")+")");
}
a little bit better than eval, since it will most likely throw errors, when sbd. tries to inject some code.
Because it will try to access these properties on the ctx-object instead of the window-object.
var rule = parse("device2.temperature > 20 || device2.humidity>68 && device3.temperature >10");
var data = {
device2: {
temperature: 18,
humidity: 70
},
device3: {
temperature: 15,
humidity: 75
}
};
console.log( rule.toString() );
console.log( rule(data) );
Overkill:
beware, not fully tested. may still contain errors
And, code doesn't check wether syntax is valid, only throws on a few obvious errors.
var parse = (function(){
function parse(){
var cache = {};
//this may be as evil as eval, so take care how you use it.
function raw(v){ return cache[v] || (cache[v] = Function("return " + v)) }
//parses Strings and converts them to operator-tokens or functions
function parseStrings(v, prop, symbol, number, string){
if(!prop && !symbol && !number && !string){
throw new Error("unexpected/unhandled symbol", v);
}else{
var w;
switch(prop){
//keywords
case "true":
case "false":
case "null":
w = raw( v );
break;
}
tokens.push(
w ||
~unary.indexOf(prop) && v ||
prop && parse.fetch(v) ||
number && raw( number ) ||
string && raw( string ) ||
symbol
);
}
}
var tokens = [];
for(var i = 0; i < arguments.length; ++i){
var arg = arguments[i];
switch(typeof arg){
case "number":
case "boolean":
tokens.push(raw( arg ));
break;
case "function":
tokens.push( arg );
break;
case "string":
//abusing str.replace() as kind of a RegEx.forEach()
arg.replace(matchTokens, parseStrings);
break;
}
}
for(var i = tokens.lastIndexOf("("), j; i>=0; i = tokens.lastIndexOf("(")){
j = tokens.indexOf(")", i);
if(j > 0){
tokens.splice(i, j+1-i, process( tokens.slice( i+1, j ) ));
}else{
throw new Error("mismatching parantheses")
}
}
if(tokens.indexOf(")") >= 0) throw new Error("mismatching parantheses");
return process(tokens);
}
//combines tokens and functions until a single function is left
function process(tokens){
//unary operators like
unary.forEach(o => {
var i = -1;
while((i = tokens.indexOf(o, i+1)) >= 0){
if((o === "+" || o === "-") && typeof tokens[i-1] === "function") continue;
tokens.splice( i, 2, parse[ unaryMapping[o] || o ]( tokens[i+1] ));
}
})
//binary operators
binary.forEach(o => {
for(var i = tokens.lastIndexOf(o); i >= 0; i = tokens.lastIndexOf(o)){
tokens.splice( i-1, 3, parse[ o ]( tokens[i-1], tokens[i+1] ));
}
})
//ternary operator
for(var i = tokens.lastIndexOf("?"), j; i >= 0; i = tokens.lastIndexOf("?")){
if(tokens[i+2] === ":"){
tokens.splice(i-1, 5, parse.ternary(tokens[i-1], tokens[i+1], tokens[i+3] ));
}else{
throw new Error("unexpected symbol")
}
}
if(tokens.length !== 1){
throw new Error("unparsed tokens left");
}
return tokens[0];
}
var unary = "!,~,+,-,typeof".split(",");
var unaryMapping = { //to avoid collisions with the binary operators
"+": "plus",
"-": "minus"
}
var binary = "**,*,/,%,+,-,<<,>>,>>>,<,<=,>,>=,==,!=,===,!==,&,^,|,&&,||".split(",");
var matchTokens = /([a-z$_][\.a-z0-9$_]*)|([+\-*/!~^]=*|[\(\)?:]|[<>&|=]+)|(\d+(?:\.\d*)?|\.\d+)|(["](?:\\[\s\S]|[^"])+["]|['](?:\\[\s\S]|[^'])+['])|\S/gi;
(function(){
var def = { value: null };
var odp = (k,v) => { def.value = v; Object.defineProperty(parse, k, def) };
unary.forEach(o => {
var k = unaryMapping[o] || o;
k in parse || odp(k, Function("a", "return function(ctx){ return " + o + "(a(ctx)) }"));
})
//most browsers don't support this syntax yet, so I implement this manually
odp("**", (a,b) => (ctx) => Math.pow(a(ctx), b(ctx)));
binary.forEach(o => {
o in parse || odp(o, Function("a,b", "return function(ctx){ return a(ctx) "+o+" b(ctx) }"));
});
odp("ternary", (c,t,e) => ctx => c(ctx)? t(ctx): e(ctx));
odp("fetch", key => {
var a = key.split(".");
return ctx => {
//fetches a path, like devices.2.temperature
//does ctx["devices"][2]["temperature"];
for(var i=0, v = ctx /*|| window*/; i<a.length; ++i){
if(v == null) return void 0;
v = v[a[i]];
}
return v;
}
});
/* some sugar */
var aliases = {
"or": "||",
"and": "&&",
"not": "!"
}
for(var name in aliases) odp(name, parse[aliases[name]]);
})();
return parse;
})();
and your code:
var data = {
device2: {
temperature: 18,
humidity: 70
},
device3: {
temperature: 15,
humidity: 75
}
};
//you get back a function, that expects the context to work on (optional).
//aka. (in wich context/object is `device2` defined?)
var rule = parse("device2.temperature > 20 || device2.humidity>68 && device3.temperature >10");
console.log("your rule resolved:", rule(data));
sugar:
var rule1 = parse("device2.temperature > 20");
var rule2 = parse("device2.humidity>68 && device3.temperature >10");
//partials/combining rules to new ones
//only `and` (a && b), `or` (a || b), `plus` (+value), `minus` (-value) and 'not', (!value) have named aliases
var rule3 = parse.or(rule1, rule2);
//but you can access all operators like this
var rule3 = parse['||'](rule1, rule2);
//or you can combine functions and strings
var rule3 = parse(rule1, "||", rule2);
console.log( "(", rule1(data), "||", rule2(data), ") =", rule3(data) );
//ternary operator and Strings (' and " supported)
var example = parse(rule1, "? 'device2: ' + device2.temperature : 'device3: ' + device3.temperature");
console.log( example(data) )
What else to know:
Code handles operator precedence and supports round brackets
If a Path can't be fetched, it the particular function returns undefined (no Errors thrown here)
Access to Array-keys in the paths: parse("devices.2.temperature") fetches devices[2].temperature
not implemented:
parsing Arrays and parsing function-calls and everything around value modification. This engine does some computation, it expects some Value in, and gives you a value out. No more, no less.

IsNan() function considers certain kind of strings as number - node js

I'm checking for integer values in node.js using IsNaN function.
Unexpectedly, this function validates the strings like 1E267146, 1E656716 , 914E6583 to be numbers, as these strings are exponential values. Any way to work around this? In actual scenario i wont get any exponential values.
ECMA6 defines Number.isInteger as follows:
Javascript
function isInteger(nVal) {
return typeof nVal === "number" && isFinite(nVal) && nVal > -9007199254740992 && nVal < 9007199254740992 && Math.floor(nVal) === nVal;
}
but this will also accept scientific notation
console.log(isInteger(1e6));
console.log(isInteger(+"1e6"));
jsfiddle
You need to be clear as to what your definitions/expectations are.
My guess is that you may want something like this, if you are testing strings and have no limits on the max or min integer.
Javascript
function isStringNumericalInteger(testValue) {
return typeof testValue === "string" && /^[\-+]?[1-9]{1}\d+$|^[\-+]?0$/.test(testValue);
}
console.log(isStringNumericalInteger("9007199254740991"));
console.log(isStringNumericalInteger("-123216848516878975616587987846516879844651654847"));
console.log(isStringNumericalInteger("1.1"));
console.log(isStringNumericalInteger("-1.1"));
console.log(isStringNumericalInteger("1e10"));
console.log(isStringNumericalInteger("010"));
console.log(isStringNumericalInteger("0x9"));
console.log(isStringNumericalInteger(""));
console.log(isStringNumericalInteger(" "));
console.log(isStringNumericalInteger());
console.log(isStringNumericalInteger(null));
console.log(isStringNumericalInteger([]));
console.log(isStringNumericalInteger({}));
Output
true
true
false
false
false
false
false
false
false
false
false
false
false
jsfiddle
If you want to bound the range to what javascript can represent numerically as an integer then you will need to add a test for && +testValue > -9007199254740992 && +testValue < 9007199254740992
If you don't like using RegExs, you can also accomplish this with a parser. Something like this:
Javascript
function isCharacterDigit(testCharacter) {
var charCode = testCharacter.charCodeAt(0);
return charCode >= 48 && testCharacter <= 57;
}
function isStringNumericalInteger(testValue) {
var start = 0,
character,
index,
length;
if (typeof testValue !== "string") {
return false;
}
character = testValue.charAt(start);
if (character === "+" || character === "-") {
start += 1;
character = testValue.charAt(start);
}
start += 1;
length = testValue.length;
if ((length > start && character === "0") || !isCharacterDigit(character)) {
return false;
}
for (index = start; index < length; index += 1) {
if (!isCharacterDigit(testValue.charAt(index))) {
return false;
}
}
return true;
}
jsfiddle
I would use something like below code to validate number input. First I parse the given value to float and then check isNaN().
var isNumber = function (obj) {
return !isNaN(parseFloat(obj)) && isFinite(obj);
};
I think this is what you need in your case (i hate regex because this is not very good for the performance but..)
http://jsbin.com/EQiBada/1/
var NMAX = Math.pow(2, 53);
function isNumeric(n) {
n = n < 0 ? n * -1 : n;
var r = /^\d+$/.test(n);
if (r === true)
{
return parseInt(n, 10) >= (NMAX * -1) + 1 && parseInt(n, 10) <= NMAX;
}
return false;
}
Minified
var NMAX = Math.pow(2, 53);
function isNumericMin(n) {
n = n < 0 ? n * -1 : n;
return /^\d+$/.test(n) === true ? parseInt(n, 10) >= (NMAX * -1) + 1 && parseInt(n, 10) <= NMAX : false;
}
var i = '1E267146'
if(isNaN(i) || !isFinite(i) !! i=="")
{
// do stuff
}
else
{
// do stuff
}

Compare Strings Javascript Return %of Likely

I am looking for a JavaScript function that can compare two strings and return the likeliness that they are alike. I have looked at soundex but that's not really great for multi-word strings or non-names. I am looking for a function like:
function compare(strA,strB){
}
compare("Apples","apple") = Some X Percentage.
The function would work with all types of strings, including numbers, multi-word values, and names. Perhaps there's a simple algorithm I could use?
Ultimately none of these served my purpose so I used this:
function compare(c, u) {
var incept = false;
var ca = c.split(",");
u = clean(u);
//ca = correct answer array (Collection of all correct answer)
//caa = a single correct answer word array (collection of words of a single correct answer)
//u = array of user answer words cleaned using custom clean function
for (var z = 0; z < ca.length; z++) {
caa = $.trim(ca[z]).split(" ");
var pc = 0;
for (var x = 0; x < caa.length; x++) {
for (var y = 0; y < u.length; y++) {
if (soundex(u[y]) != null && soundex(caa[x]) != null) {
if (soundex(u[y]) == soundex(caa[x])) {
pc = pc + 1;
}
}
else {
if (u[y].indexOf(caa[x]) > -1) {
pc = pc + 1;
}
}
}
}
if ((pc / caa.length) > 0.5) {
return true;
}
}
return false;
}
// create object listing the SOUNDEX values for each letter
// -1 indicates that the letter is not coded, but is used for coding
// 0 indicates that the letter is omitted for modern census archives
// but acts like -1 for older census archives
// 1 is for BFPV
// 2 is for CGJKQSXZ
// 3 is for DT
// 4 is for L
// 5 is for MN my home state
// 6 is for R
function makesoundex() {
this.a = -1
this.b = 1
this.c = 2
this.d = 3
this.e = -1
this.f = 1
this.g = 2
this.h = 0
this.i = -1
this.j = 2
this.k = 2
this.l = 4
this.m = 5
this.n = 5
this.o = -1
this.p = 1
this.q = 2
this.r = 6
this.s = 2
this.t = 3
this.u = -1
this.v = 1
this.w = 0
this.x = 2
this.y = -1
this.z = 2
}
var sndx = new makesoundex()
// check to see that the input is valid
function isSurname(name) {
if (name == "" || name == null) {
return false
} else {
for (var i = 0; i < name.length; i++) {
var letter = name.charAt(i)
if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) {
return false
}
}
}
return true
}
// Collapse out directly adjacent sounds
// 1. Assume that surname.length>=1
// 2. Assume that surname contains only lowercase letters
function collapse(surname) {
if (surname.length == 1) {
return surname
}
var right = collapse(surname.substring(1, surname.length))
if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) {
return surname.charAt(0) + right.substring(1, right.length)
}
return surname.charAt(0) + right
}
// Collapse out directly adjacent sounds using the new National Archives method
// 1. Assume that surname.length>=1
// 2. Assume that surname contains only lowercase letters
// 3. H and W are completely ignored
function omit(surname) {
if (surname.length == 1) {
return surname
}
var right = omit(surname.substring(1, surname.length))
if (!sndx[right.charAt(0)]) {
return surname.charAt(0) + right.substring(1, right.length)
}
return surname.charAt(0) + right
}
// Output the coded sequence
function output_sequence(seq) {
var output = seq.charAt(0).toUpperCase() // Retain first letter
output += "-" // Separate letter with a dash
var stage2 = seq.substring(1, seq.length)
var count = 0
for (var i = 0; i < stage2.length && count < 3; i++) {
if (sndx[stage2.charAt(i)] > 0) {
output += sndx[stage2.charAt(i)]
count++
}
}
for (; count < 3; count++) {
output += "0"
}
return output
}
// Compute the SOUNDEX code for the surname
function soundex(value) {
if (!isSurname(value)) {
return null
}
var stage1 = collapse(value.toLowerCase())
//form.result.value=output_sequence(stage1);
var stage1 = omit(value.toLowerCase())
var stage2 = collapse(stage1)
return output_sequence(stage2);
}
function clean(u) {
var u = u.replace(/\,/g, "");
u = u.toLowerCase().split(" ");
var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"];
var n = [];
for (var y = 0; y < u.length; y++) {
var test = false;
for (var z = 0; z < cw.length; z++) {
if (u[y] != "" && u[y] != cw[z]) {
test = true;
break;
}
}
if (test) {
//Don't use & or $ in comparison
var val = u[y].replace("$", "").replace("&", "");
n.push(val);
}
}
return n;
}
Here's an answer based on Levenshtein distance https://en.wikipedia.org/wiki/Levenshtein_distance
function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
For calculating edit distance
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
Usage
similarity('Stack Overflow','Stack Ovrflw')
returns 0.8571428571428571
You can play with it below:
function checkSimilarity(){
var str1 = document.getElementById("lhsInput").value;
var str2 = document.getElementById("rhsInput").value;
document.getElementById("output").innerHTML = similarity(str1, str2);
}
function similarity(s1, s2) {
var longer = s1;
var shorter = s2;
if (s1.length < s2.length) {
longer = s2;
shorter = s1;
}
var longerLength = longer.length;
if (longerLength == 0) {
return 1.0;
}
return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength);
}
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0)
costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue = Math.min(Math.min(newValue, lastValue),
costs[j]) + 1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0)
costs[s2.length] = lastValue;
}
return costs[s2.length];
}
<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div>
<div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div>
<div>Match: <span id="output">No Input</span></div>
Using this library for string similarity worked like a charm for me!
Here's the Example -
var similarity = stringSimilarity.compareTwoStrings("Apples","apple"); // => 0.88
Here is a very simple function that does a comparison and returns a percentage based on equivalency. While it has not been tested for all possible scenarios, it may help you get started.
function similar(a,b) {
var equivalency = 0;
var minLength = (a.length > b.length) ? b.length : a.length;
var maxLength = (a.length < b.length) ? b.length : a.length;
for(var i = 0; i < minLength; i++) {
if(a[i] == b[i]) {
equivalency++;
}
}
var weight = equivalency / maxLength;
return (weight * 100) + "%";
}
alert(similar("test","tes")); // 75%
alert(similar("test","test")); // 100%
alert(similar("test","testt")); // 80%
alert(similar("test","tess")); // 75%
To Find degree of similarity between two strings; we can use more than one or two methods but I am mostly inclined towards the usage of 'Dice's Coefficient' . which is better! well in my knowledge than using 'Levenshtein distance'
Using this 'string-similarity' package from npm you will be able to work on what I said above.
some easy usage examples are
var stringSimilarity = require('string-similarity');
var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');
var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
for more please visit the link given above. Thankyou.
Just one I quickly wrote that might be good enough for your purposes:
function Compare(strA,strB){
for(var result = 0, i = strA.length; i--;){
if(typeof strB[i] == 'undefined' || strA[i] == strB[i]);
else if(strA[i].toLowerCase() == strB[i].toLowerCase())
result++;
else
result += 4;
}
return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length));
}
This weighs characters that are the same but different case 1 quarter as heavily as characters that are completely different or missing. It returns a number between 0 and 1, 1 meaning the strings are identical. 0 meaning they have no similarities. Examples:
Compare("Apple", "Apple") // 1
Compare("Apples", "Apple") // 0.8181818181818181
Compare("Apples", "apple") // 0.7727272727272727
Compare("a", "A") // 0.75
Compare("Apples", "appppp") // 0.45833333333333337
Compare("a", "b") // 0
How about function similar_text from PHP.js library?
It is based on a PHP function with the same name.
function similar_text (first, second) {
// Calculates the similarity between two strings
// discuss at: http://phpjs.org/functions/similar_text
if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') {
return 0;
}
first += '';
second += '';
var pos1 = 0,
pos2 = 0,
max = 0,
firstLength = first.length,
secondLength = second.length,
p, q, l, sum;
max = 0;
for (p = 0; p < firstLength; p++) {
for (q = 0; q < secondLength; q++) {
for (l = 0;
(p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++);
if (l > max) {
max = l;
pos1 = p;
pos2 = q;
}
}
}
sum = max;
if (sum) {
if (pos1 && pos2) {
sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2));
}
if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) {
sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max));
}
}
return sum;
}
fuzzyset - A fuzzy string set for javascript.
fuzzyset is a data structure that performs something akin to fulltext search against data to determine likely mispellings and approximate string matching. Note that this is a javascript port of a python library.
To some extent, I like the ideas of Dice's coefficient embedded in the string-similarity module. But I feel that considering the bigrams only and not taking into account their multiplicities is missing some important data. Below is a version that also handles multiplicities, and I think is a simpler implementation overall. I don't try to use their API, offering only a function which compares two strings after some manipulation (removing non-alphanumeric characters, lower-casing everything, and compressing but not removing whitespace), built atop one which compares them without that manipulation. It would be easy enough to wrap this back in their API, but I see little need.
const stringSimilarity = (a, b) =>
_stringSimilarity (prep (a), prep (b))
const _stringSimilarity = (a, b) => {
const bg1 = bigrams (a)
const bg2 = bigrams (b)
const c1 = count (bg1)
const c2 = count (bg2)
const combined = uniq ([... bg1, ... bg2])
.reduce ((t, k) => t + (Math .min (c1 [k] || 0, c2 [k] || 0)), 0)
return 2 * combined / (bg1 .length + bg2 .length)
}
const prep = (str) => // TODO: unicode support?
str .toLowerCase () .replace (/[^\w\s]/g, ' ') .replace (/\s+/g, ' ')
const bigrams = (str) =>
[...str] .slice (0, -1) .map ((c, i) => c + str [i + 1])
const count = (xs) =>
xs .reduce ((a, x) => ((a [x] = (a [x] || 0) + 1), a), {})
const uniq = (xs) =>
[... new Set (xs)]
console .log (stringSimilarity (
'foobar',
'Foobar'
)) //=> 1
console .log (stringSimilarity (
"healed",
"sealed"
))//=> 0.8
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: table in very good condition, olive green in colour."
)) //=> 0.7787610619469026
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"For sale: green Subaru Impreza, 210,000 miles"
)) //=> 0.38636363636363635
console .log (stringSimilarity (
"Olive-green table for sale, in extremely good condition.",
"Wanted: mountain bike with at least 21 gears."
)) //=> 0.1702127659574468
console .log (stringSimilarity (
"The rain in Spain falls mainly on the plain.",
"The run in Spun falls munly on the plun.",
)) //=> 0.7560975609756098
console .log (stringSimilarity (
"Fa la la la la, la la la la",
"Fa la la la la, la la",
)) //=> 0.8636363636363636
console .log (stringSimilarity (
"car crash",
"carcrash",
)) //=> 0.8
console .log (stringSimilarity (
"Now is the time for all good men to come to the aid of their party.",
"Huh?",
)) //=> 0
.as-console-wrapper {max-height: 100% !important; top: 0}
Some of the test cases are from string-similarity, others are my own. They show some significant differences from that package, but nothing untoward. The only one I would call out is the difference between "car crash" and "carcrash", which string-similarity sees as identical and I report with a similarity of 0.8. My version finds more similarity in all the olive-green test-cases than does string-similarity, but as these are in any case fairly arbitrary numbers, I'm not sure how much difference it makes; they certainly position them in the same relative order.
string-similarity lib vs Top answer (by #overloard1234) performance comparation you can find below
Based on #Tushar Walzade's advice to use string-similarity library, you can find, that for example
stringSimilatityLib.findBestMatch('KIA','Kia').bestMatch.rating
will return 0.0
So, looks like better to compare it in lowerCase.
Better base usage (for arrays) :
findBestMatch(str, strArr) {
const lowerCaseArr = strArr.map(element => element.toLowerCase());//creating lower case array
const match = stringSimilatityLib.findBestMatch(str.toLowerCase(), lowerCaseArr).bestMatch; //trying to find bestMatch
if (match.rating > 0) {
const foundIndex = lowerCaseArr.findIndex(x => x === match.target); //finding the index of found best case
return strArr[foundIndex]; //returning initial value from array
}
return null;
},
Performance
Also, i compared top answer here (made by #overloard1234) and string-similarity lib (v4.0.4).
The results you can find here : https://jsbench.me/szkzojoskq/1
Result : string-similarity is ~ twice faster
Just for fun : v2.0 of string-similarity library slower, than latest 4.0.4 about 2.2 times. So update it, if you are still using < 3.0 :)
const str1 = " pARTH PARmar r ";
const str2 = " parmar r par ";
function calculateSimilarity(str1 = "", str2 = "") {
let longer = str1.trim();
let shorter = str2.trim();
let a1 = longer.toLowerCase().split(" ");
let b1 = shorter.toLowerCase().split(" ");
let result = a1.every((aa, i) => aa[0] === b1[i][0]);
if (longer.length < shorter.length) [longer,shorter] = [shorter,longer];
var arr = [];
let count = 0;
for(var i = 0;i<longer.length;i++){
if(shorter && shorter.includes(longer[i])) {
shorter = shorter.replace(longer[i],"")
count++
};
}
return {
score : (count*100)/longer.length,
result
}
}
console.log(calculateSimilarity(str1, str2));
I used #overlord1234 function, but corrected ь: '', cuz English words don't have this letter, and next need return a[char] ?? char instead of return a[char] || char

Categories

Resources