How to find an unknown pattern, that is present in multiple strings? - javascript

I'm trying to find a part in multiple strings, that all strings share in common. For example:
const string1 = '.bold[_ngcontent="_kjhafh-asda-qw"] {background:black;}';
const string2 = '[_ngcontent="_kjhafh-asda-qw"] {background-color:hotpink;}';
const string3 = 'div > p > span[_ngcontent="_kjhafh-asda-qw"] {background:hotpink;}'
I don't know in advance what exactly the string is that I'm looking for, so I have to loop over the strings and find out. In the example above, the pattern would be [_ngcontent="_kjhafh-asda-qw"].
Is this even possible? Also, it would have to understand that maybe no such pattern exists. And are there methods for that or do I need to implement such an algorithm myself?
EDIT (context): We are building a validator, that checks a micro-frontend for global CSS rules (not prefixed and outside a shadow-dom), by loading it in isolation in a headless browser (within a jenkins pipeline) and validate, that it should not break any other stuff by global rules, that might be outside the context of the micro-frontend, on the same page. Using a headless browser, we can make use of the document.styleSheets property and not miss any styles that are being loaded. This will find <style> tags and its contents, aswell as content of external stylesheets.

Leveraging the BLAST algorithm, the following code snippet seeks successively matching substrings.
//
// See https://stackoverflow.com/questions/13006556/check-if-two-strings-share-a-common-substring-in-javascript/13007065#13007065
// for the following function...
//
String.prototype.subCompare = function(needle, haystack, minLength) {
var i,j;
haystack = haystack || this.toLowerCase();
minLength = minLength || 5;
for (i=needle.length; i>=minLength; i--) {
for (j=0; j <= (needle.length - i); j++) {
var substring = needle.substr(j,i);
var k = haystack.indexOf(substring);
if (k !== -1) {
return {
found : 1,
substring : substring,
needleIndex : j,
haystackIndex : k
};
}
}
}
return {
found : 0
}
}
//
// Iterate through the array of strings, seeking successive matching substrings...
//
strings = [
'.bold[_ngcontent="_kjhafh-asda-qw"] {background:black;}',
'[_ngcontent="_kjhafh-asda-qw"] {background-color:hotpink;}',
'div > p > span[_ngcontent="_kjhafh-asda-qw"] {background:hotpink;}'
]
check = { found: 1, substring: strings[ 0 ] }
i = 1;
while ( check.found && i < strings.length ) {
check = check.substring.subCompare( strings[ i++ ] );
}
console.log( check );
Note that without seeing a larger sampling of string data, it's not clear whether this algorithm satisfies the objective...

Thanks to Trentium's answer, I was able to do it. I adapted the code a little bit, as it was doing too much and also, the substr didn't yield a consistent result (it depended on the order of input strings).
The code could obviously be further minified/simplified.
const findCommonPattern = (base, needle, minLength = 5) => {
const haystack = base.toLowerCase();
for (let i = needle.length; i >= minLength; i--) {
for (let j = 0; j <= needle.length - i; j++) {
let prefix = needle.substr(j, i);
let k = haystack.indexOf(prefix);
if (k !== -1) {
return {
found: true,
prefix,
};
}
}
}
return {
found: false,
};
};
const checkIfCssIsPrefixed = (strings) => {
let check = { found: true };
let matchingStrings = [];
for (let i = 1; check.found && i < strings.length; ++i) {
check = findCommonPattern(strings[0], strings[i]);
matchingStrings.push(check.prefix);
}
// Sort by length and take the shortest string, which will be the pattern that all of the strings share in common.
check.prefix = matchingStrings.sort((a, b) => a.length - b.length)[0];
return check;
};
console.log(
checkIfCssIsPrefixed([
".spacer[_ngcontent-wdy-c0]",
"[_nghost-wdy-c0]",
".toolbar[_ngcontent-wdy-c0]",
"p[_ngcontent-wdy-c0]",
".spacer[_ngcontent-wdy-c0]",
".toolbar[_ngcontent-wdy-c0] img[_ngcontent-wdy-c0]",
"h1[_ngcontent-wdy-c0], h2[_ngcontent-wdy-c0], h3[_ngcontent-wdy-c0], h4[_ngcontent-wdy-c0], h5[_ngcontent-wdy-c0], h6[_ngcontent-wdy-c0]",
".toolbar[_ngcontent-wdy-c0] #twitter-logo[_ngcontent-wdy-c0]",
".toolbar[_ngcontent-wdy-c0] #youtube-logo[_ngcontent-wdy-c0]",
".toolbar[_ngcontent-wdy-c0] #twitter-logo[_ngcontent-wdy-c0]:hover, .toolbar[_ngcontent-wdy-c0] #youtube-logo[_ngcontent-wdy-c0]:hover",
])
);

Related

Swapping consecutive/adjacent characters of a string in JavaScript

Example string: astnbodei, the actual string must be santobedi. Here, my system starts reading a pair of two characters from the left side of a string, LSB first and then the MSB of the character pair. Therefore, santobedi is received as astnbodei. The strings can be a combination of letters and numbers and even/odd lengths of characters.
My attempt so far:
var attributes_ = [Name, Code,
Firmware, Serial_Number, Label
]; //the elements of 'attributes' are the strings
var attributes = [];
for (var i = 0; i < attributes_.length; i++) {
attributes.push(swap(attributes_[i].replace(/\0/g, '').split('')));
}
function swap(array_attributes) {
var tmpArr = array_attributes;
for (var k = 0; k < tmpArr.length; k += 2) {
do {
var tmp = tmpArr[k];
tmpArr[k] = tmpArr[k+1]
tmpArr[k+1] = tmp;
} while (tmpArr[k + 2] != null);
}
return tmpArr;
}
msg.Name = attributes; //its to check the code
return {msg: msg, metadata: metadata,msgType: msgType}; //its the part of system code
While running above code snippet, I received the following error:
Can't compile script: javax.script.ScriptException: :36:14 Expected : but found ( return {__if(); ^ in at line number 36 at column number 14
I'm not sure what the error says. Is my approach correct? Is there a direct way to do it?
Did you try going through the array in pairs and swapping using ES6 syntax?
You can swap variables like this in ES6:
[a, b] = [b, a]
Below is one way to do it. The code you have is not valid because return is not allowed outside a function.
let string = "astnbodei";
let myArray = string.split('');
let outputArray = [];
for (i=0; i<myArray.length; i=i+2) {
outputArray.push(myArray[i+1]);
outputArray.push(myArray[i]);
}
console.log(outputArray.join(''));
Consecutive pairwise character swapping of/within a string very easily can be solved by a reduce task ...
function swapCharsPairwise(value) {
return String(value)
.split('')
.reduce((result, antecedent, idx, arr) => {
if (idx % 2 === 0) {
// access the Adjacent (the Antecedent's next following char).
adjacent = arr[idx + 1];
// aggregate result while swapping `antecedent` and `adjacent`.
result.push(adjacent, antecedent);
}
return result;
}, []).join('');
}
console.log(
'swapCharsPairwise("astnbodei") ...',
swapCharsPairwise("astnbodei")
);
The reason for the error in the question was the placement of the function swap. However, switching its placement gave me another error:
java.util.concurrent.ExecutionException: java.util.concurrent.ExecutionException: javax.script.ScriptException: delight.nashornsandbox.exceptions.ScriptCPUAbuseException: Script used more than the allowed [8000 ms] of CPU time.
#dikuw's answer helped me partially. The following line of code worked for me:
var attributes_ = [Name, Code,
Firmware, Serial_Number, Label
]; //the elements of 'attributes' are the strings
var attributes = [];
for (var i = 0; i < attributes_.length; i++) {
attributes.push(swap(attributes_[i].replace(/\0/g, '').split('')));
}
return {msg: msg, metadata: metadata,msgType: msgType};
function swap(array_attributes) {
var tmpArr = [];
for (var k = 0; k < array_attributes.length; k= k+2) {
if( (array_attributes[k + 1] != null)) {
tmpArr.push(array_attributes[k+1]);
tmpArr.push(array_attributes[k]);
}}
return tmpArr.join('');
}

Find Longest Prefix That is Also a Suffix in String - Javascript

Link to codewars challenge
I need to return the length of the longest prefix that is also a suffix of a string in Javascript.
As far as I understand, the prefixes in "abcd" are:
['a', 'ab', 'abc']
And the suffixes in "abcd" are:
[ 'bcd', 'cd', 'd' ]
So the length of the longest prefix that is also a suffix in "abcd" in this case is 0, because there are no prefixes that are also suffixes in "abcd".
So far I've been able to figure out how to get the suffixes into an array for comparison, but not the prefixes.
function returnLongestPrefixAndSuffix(string) {
let prefixes = [];
let suffixes = [];
for (let i = 0; i < string.length -1; i++) {
prefixes.push(string.slice(i));
}
for (let i = 1; i < string.length; i++) {
suffixes.push(string.slice(i));
}
return prefixes + " " + suffixes;
}
console.log(returnLongestPrefixAndSuffix("abcd"));
I'm not grasping the concept of how to start at the beginning of a string and add a larger element to the array each time by one character, excluding the element that would include the last one.
Please follow my current logic if possible.
EDIT: My code now looks like this:
function solve(string) {
let prefixes = [];
let suffixes = [];
let includedList = [];
for (let i = 1; i < string.length; i++) {
prefixes.push(string.slice(0, i));
}
for (let i = 1; i < string.length; i++) {
suffixes.push(string.slice(-i));
}
console.log(prefixes);
console.log(suffixes);
for (let i = 0; i < prefixes.length; i++) {
let element = prefixes[i];
if (suffixes.includes(element) === true) {
includedList.push(element);
}
}
console.log(includedList);
if (includedList.length === 0) {
return 0;
}
else {
let overlap = prefixes.filter(value => suffixes.includes(value));
console.log(overlap);
let longest = includedList.sort(function (a, b) { return b.length - a.length; })[0];
return longest.length;
}
}
console.log(solve("abcdabc"));
And this is passing 10049 test but failing 163 tests on codewars. I still do not know what to do with the overlap variable or how to exclude overlaps from the includedList array.
function solve(string) {
for (let i = Math.floor(string.length / 2); i > 0; i--) {
let prefix = string.slice(0, i);
let suffix = string.slice(-i);
if (prefix == suffix) {
return i;
}
}
return 0;
}
console.log(solve("abcdabc"));
To account for the overlap, initialize your for-loop like this:
let i = Math.floor(string.length / 2)
That will initialize the for-loop at the half-way point in your string, so that you can count down and compare whether or not the prefix == the suffix, starting with the longest.
You could return prefix.length, but that will be the same thing as i.
Also, be sure to return 0 outside of the for-loop. Because if you try:
if (prefix != suffix) {
return 0;
}
inside of the for-loop, it will stop counting right there.
To get the prefixes, you can use the second argument of .slice:
string.slice(0, i)
Note that to get the suffixes, you could also take the string from the end:
string.slice(-i)
There is no sense in collecting prefixes and suffixes in arrays, just search for the biggest i where the suffix equals the prefix.
Please see the documentation of the slice function, it may take a second argument: https://www.w3schools.com/jsref/jsref_slice_string.asp
So following your logic, one way to get prefixes would be to:
for (let i = 1; i <= string.length; i++) {
prefixes.push(string.slice(0, i));
}
EDIT:
Your newest code doesn't work because of two reasons:
You may end up with includedList being empty, but you still try to get first element out of it.
You don't take overlaps into consideration. For the input aaa the correct result is a since prefix aa overlaps with the corresponding suffix. In other words the result can't be longer than half the length of the input.

Performance comparison with V8

I'm currently testing multiple cases for parsing lines.
Each line is formatted like that:
"dHdX5jOa7ww9cGsW7jQF=dHdX5jOa7ww9cGsW7jQF=dHdX5jOa7ww9cGsW7jQF=dHdX5jOa7ww9cGsW7jQF"
There are a lot of lines of course, and I need to extract the key, and the value.
The key is delimited by the first "=" found.
There is never a "=" char in the key.
The value is the rest of string next after the first "=" sign.
So for this exemple the result should be:
{
key: "dHdX5jOa7ww9cGsW7jQF",
value: "dHdX5jOa7ww9cGsW7jQF=dHdX5jOa7ww9cGsW7jQF=dHdX5jOa7ww9cGsW7jQF"
}
From here we can iterate on multiple solutions:
// the first one is not very efficient with split splice join method
function first(line) {
const lineSplit = line.split('='),
key = lineSplit[0],
value = lineSplit.splice(1, lineSplit.length).join('=');
return {
key,
value
};
}
// the second one execute only what i want to do
// with built-in String prototype's functions
function optimized(line) {
const index = line.indexOf("="),
key = line.substr(0, index),
value = line.substr(index + 1, line.length);
return {
key,
value
};
}
// i tried to code the logic myself
function homemade(line) {
const len = line.length;
let value = "", key = "", valued = false;
for (let i = 0; i < len; ++i) {
const char = line[i];
if (valued === false) {
if (char !== '=') {
key += char;
} else {
valued = true;
}
} else {
value += char;
}
}
return {
key,
value
};
}
// and next recode substr and foreach built-in to implemant the same
// function but with homemade substr&foreach
String.prototype.substr2 = function(from, to){
let str = "";
for (let i = from; i < to; ++i) {
str += this[i];
}
return str;
};
String.prototype.indexOf2 = function(occ){
const len = this.length;
for (let i = 0; i < len; ++i) {
if (this[i] === occ) {
return i;
}
}
return -1;
};
function overload(line) {
const index = line.indexOf2("="),
key = line.substr2(0, index),
value = line.substr2(index + 1, line.length);
return {
key,
value
};
}
And voila the results with jsBench:
[I'm using Google Chrome Version 59.0.3071.104 (Official Build) (64-bit)]
You can checkout the results of these functions with your browser in this jsBench
I don't understand what is going on. I imagined that cannot be possible since I wrote only the code i needed with native for() and other stuffs like this...
My questions are:
Why the builtin string operations are obviously much faster ?
Why this repeated string concatenation is inneficient ?
Is there an alternative to it ?
Why the builtin string operations are obviously much faster ?
Because they are optimized, and use internal implementation tricks that are not available to JavaScript code. For example, they avoid repeated string concatenation by building the result in one go.
Why this repeated string concatenation is inefficient ?
Because it creates many strings as intermediate results.
Is there an alternative to it ?
Use the builtin string operations :-)

How to write a character matching algorithm in JavaScript?

Given this input s1 = "dadxx" s2 = "ddxx" I'd expect the output to contain a bunch of a,b pairs wherever each character in s1 matched a character in s2 and vice versa (duplicates allowed). Among those pairs would be 0,0 because s1[0] and s2[0] are both equal to d.
The problem is that my output doesn't contain 2,1 even though s1[2] and s2[1] are both equal to d.
Can someone fix my algorithm or make a better one?
Here's a JSFiddle if it helps.
Here's my code:
// For each char, see if other string contains it
s1 = 'dadxx'
s2 = 'ddxx'
matchChars(s1,s2)
matchChars(s2,s1)
function matchChars(a,b) {
for (i = 0; i < a.length; i++) {
found = b.indexOf(a[i])
if (found >= 0) {
if (a===s1) console.log(i,found)
else console.log(found,i)
}
}
}
I believe the problem you're having is that you're only checking for a single match for s1[i] in s2 by using indexOf. That will find the first index of a matched value, not every index.
If you instead iterate through both strings and compare every character, you get the result I think you're trying to achieve.
// Define strings
s1 = 'dadxx'
s2 = 'ddxx'
matchChars(s1,s2)
matchChars(s2,s1)
function matchChars(a,b) {
// Convert strings to lower case for case insensitive matching
// Remove if case sensitive matching required
a = a.toLowerCase();
b = b.toLowerCase();
// Iterate through every letter in s1
for (i = 0; i < a.length; i++) {
// Iterate through every letter in s2
for (j = 0; j < b.length; j++) {
// Check if the letter in s1 matches letter in s2
if (a[i] === b[j]) {
// Changed per request of OP
(a === s1) ? console.log(i, j) : console.log(j, i);
// console.log([i, j]);
}
}
}
}
Working JSBin example: https://jsbin.com/wecijopohi/edit?js,console
You say duplicates are allowed but not required. I'm submitting this as a more modern approach, not as a correction to the accepted solution, which looks good to me. https://jsfiddle.net/avc705zr/3/
match = (a, b) => {
let re, match, matches = []
a.split('').forEach((l, i) => {
re = new RegExp(l, 'g')
while ((match = re.exec(b)) != null) {
matches.push([i, match.index])
}
})
return matches
}
However, in my experience when you actually need functionality like this, you only need one of the strings to exhausted. In other words, you are looking for matches in string 2 of all instances in string 1 -- which is to say, unique characters in string 1. So a modification which might come up in the real world might instead be like:
Array.prototype.unique = function() {
return this.filter(function (value, index, self) {
return self.indexOf(value) === index;
});
}
match = (a, b) => {
let re, match, matches = []
a.split('').unique().forEach(l => {
re = new RegExp(l, 'g')
while ((match = re.exec(b)) != null) {
matches.push([l, match.index])
}
})
return matches
}

Multiple specials characters replacement optimization

I need to replace all the specials characters in a string with javascript or jQuery.
I am sure there is a better way to do this.
But I currently have no clue.
Anyone got an idea?
function Unaccent(str) {
var norm = new Array('À','Á','Â','Ã','Ä','Å','Æ','Ç','È','É','Ê','Ë','Ì','Í','Î','Ï', 'Ð','Ñ','Ò','Ó','Ô','Õ','Ö','Ø','Ù','Ú','Û','Ü','Ý','Þ','ß', 'à','á','â','ã','ä','å','æ','ç','è','é','ê','ë','ì','í','î','ï','ð','ñ', 'ò','ó','ô','õ','ö','ø','ù','ú','û','ü','ý','ý','þ','ÿ');
var spec = new Array('A','A','A','A','A','A','A','C','E','E','E','E','I','I','I','I', 'D','N','O','O','O','0','O','O','U','U','U','U','Y','b','s', 'a','a','a','a','a','a','a','c','e','e','e','e','i','i','i','i','d','n', 'o','o','o','o','o','o','u','u','u','u','y','y','b','y');
for (var i = 0; i < spec.length; i++) {
str = replaceAll(str, norm[i], spec[i]);
}
return str;
}
function replaceAll(str, search, repl) {
while (str.indexOf(search) != -1) {
str = str.replace(search, repl);
}
return str;
}
Here's a version using a lookup map that works a little more efficiently than nested loops:
function Unaccent(str) {
var map = Unaccent.map; // shortcut
var result = "", srcChar, replaceChar;
for (var i = 0, len = str.length; i < len; i++) {
srcChar = str.charAt(i);
// use hasOwnProperty so we never conflict with any
// methods/properties added to the Object prototype
if (map.hasOwnProperty(srcChar)) {
replaceChar = map[srcChar]
} else {
replaceChar = srcChar;
}
result += replaceChar;
}
return(result);
}
// assign this here so it is only created once
Unaccent.map = {'À':'A','Á':'A','Â':'A'}; // you fill in the rest of the map
Working demo: http://jsfiddle.net/jfriend00/rRpcy/
FYI, a Google search for "accent folding" returns many other implementations (many similar, but also some using regex).
Here's a bit higher performance version (2.5x faster) that can do a direct indexed lookup of the accented characters rather than having to do an object lookup:
function Unaccent(str) {
var result = "", code, lookup, replaceChar;
for (var i = 0, len = str.length; i < len; i++) {
replaceChar = str.charAt(i);
code = str.charCodeAt(i);
// see if code is in our map
if (code >= 192 && code <= 255) {
lookup = Unaccent.map.charAt(code - 192);
if (lookup !== ' ') {
replaceChar = lookup;
}
}
result += replaceChar;
}
return(result);
}
// covers chars from 192-255
// blank means no mapping for that char
Unaccent.map = "AAAAAAACEEEEIIIIDNOOOOO OUUUUY aaaaaaaceeeeiiiionooooo uuuuy y";
Working demo: http://jsfiddle.net/jfriend00/Jxr9u/
In this jsperf, the string lookup version (the 2nd example) is about 2.5x faster.
Using an object as a map is a good idea, but given the number of characters you're replacing, it's probably a good idea to pre-initialize the object so that it doesn't have to be re-initialized each time the function gets run (assuming you're running the function more than once):
var Unaccent = (function () {
var charMap = {'À':'A','Á':'A','Â':'A','Ã':'A','Ä':'A' /** etc. **/};
return function (str) {
var i, modified = "", cur;
for(i = 0; i < str.length; i++) {
cur = str.charAt(i);
modified += (charMap[cur] || cur);
}
return modified;
};
}());
This will front-load the heavy lifting of the function to page load time (you can do some modifications to delay it until the first call to the function if you like). But it will take some of the processing time out of the actual function call.
It's possible some browsers will actually optimize this part anyway, so you might not see a benefit. But on older browsers (where performance is of greater concern), you'll probably see some benefit to pre-processing your character map.
You can prepare key value pair type of array and via jquery each traverse that array.
Example :
function Unaccent(str) {
var replaceString = {'À':'A','Á':'A','Â':'A'}; // add more
$.each(replaceString, function(k, v) {
var regX = new RegExp(k, 'g');
str = str.replace(regX,v);
});
}
Working Demo
Good Luck !!

Categories

Resources