How to match overlapping keywords with regex

How to match overlapping keywords with regex - javascript

This example finds only sam. How to make it find both sam and samwise?
var regex = /sam|samwise|merry|pippin/g;
var string = 'samwise gamgee';
var match = string.match(regex);
console.log(match);
Note: this is simple example, but my real regexes are created by joining 500 keywords at time, so it's too cumbersome to search all overlapping and make a special case for them with something like /sam(wise)/. The other obvious solution I can think of, is to just iterate though all keywords individually, but I think it must be a fast and elegant, single-regex solution.

You can use lookahead regex with capturing group for this overlapping match:
var regex = /(?=(sam))(?=(samwise))/;
var string = 'samwise';
var match = string.match( regex ).filter(Boolean);
//=> ["sam", "samwise"]
It is important to not to use g (global) flag in the regex.
filter(Boolean) is used to remove first empty result from matched array.

Why not just map indexOf() on array substr:
var string = 'samwise gamgee';
var substr = ['sam', 'samwise', 'merry', 'pippin'];
var matches = substr.map(function(m) {
return (string.indexOf(m) < 0 ? false : m);
}).filter(Boolean);
See fiddle console.log(matches);
Array [ "sam", "samwise" ]
Probably of better performance than using regex. But if you need the regex functionality e.g. for caseless matching, word boundaries, returned matches... use with exec method:
var matches = substr.map(function(v) {
var re = new RegExp("\\b" + v, "i"); var m = re.exec(string);
return (m !== null ? m[0] : false);
}).filter(Boolean);
This one with i-flag (ignore case) returns each first match with initial \b word boundary.

I can't think of a simple and elegant solution, but I've got something that uses a single regex:
function quotemeta(s) {
return s.replace(/\W/g, '\\$&');
}
let keywords = ['samwise', 'sam'];
let subsumed_by = {};
keywords.sort();
for (let i = keywords.length; i--; ) {
let k = keywords[i];
for (let j = i - 1; j >= 0 && k.startsWith(keywords[j]); j--) {
(subsumed_by[k] = subsumed_by[k] || []).push(keywords[j]);
}
}
keywords.sort(function (a, b) b.length - a.length);
let re = new RegExp('(?=(' + keywords.map(quotemeta).join('|') + '))[\\s\\S]', 'g');
let string = 'samwise samgee';
let result = [];
let m;
while (m = re.exec(string)) {
result.push(m[1]);
result.push.apply(result, subsumed_by[m[1]] || []);
}
console.log(result);

How about:
var re = /((sam)(?:wise)?)/;
var m = 'samwise'.match(re); // gives ["samwise", "samwise", "sam"]
var m = 'sam'.match(re); // gives ["sam", "sam", "sam"]
You can use Unique values in an array to remove dupplicates.

If you don't want to create special cases, and if order doesn't matter, why not first match only full names with:
\b(sam|samwise|merry|pippin)\b
and then, filter if some of these doesn't contain shorter one? for example with:
(sam|samwise|merry|pippin)(?=\w+\b)
It is not one elegant regex, but I suppose it is simpler than iterating through all matches.

Related

How to remove word in string based on array in Javascript when word's character length in string is fewer than in array?

I want to remove some word in string based on array. But the word's character length in string is fewer than in array. Is it possible to match it using regex and then replace it with empty string? If not, what is the alternatives?
I tried using regex to match the word, but i can't achieve it. I don't know how to make regex match minimum 3 character from the array.
array = ['reading', 'books'];
string = 'If you want to read the book, just read it.';
desiredOutput = 'If you want to the , just it.';
// Desired match
'reading' -> match for 'rea', 'read', 'readi', 'readin', 'reading'
'books' -> match for 'boo', 'book', 'books'

One option is to match 3 or more word characters starting at a word boundary, then use a replacer function to return the empty string if any of the words startsWith the word in question:
const array = ['reading', 'books'];
const string = 'If you want to read the book, just read it.';
const output = string.replace(
/\b\w{3,}/g,
word => array.some(item => item.startsWith(word)) ? '' : word
);
console.log(output);

The answer from CertainPerformance is better - easier to implement and to maintain but it's worth noting that - you can also generate a regex from the array.
The idea is simple enough - if you want to match r, re, rea, read, readi, readin, reading the regex for that is reading|readin|readi|read|rea|re|r. The reason you want the longest variation first is because otherwise the regex engine will stop at the first match in finds:
let regex = /r|re|rea|read/g
// ↑_________________
console.log( // |
"read".replace(regex, "")// |
// ↑___________________________|
)
So you can take a word and break it out in a this pattern to generate a regex from it
function allSubstrings(word) {
let substrings = [];
for (let i = word.length; i > 0; i--) {
let sub = word.slice(0, i);
substrings.push(sub)
}
return substrings;
}
console.log(allSubstrings("reading"))
With that you can simply generate the regex you need.
function allSubstrings(word) {
let substrings = [];
for (let i = word.length; i > 0; i--) {
let sub = word.slice(0, i);
substrings.push(sub)
}
return substrings;
}
function toPattern(word) {
let substrings = allSubstrings(word);
let pattern = substrings.join("|");
return pattern;
}
console.log(toPattern("reading"))
The final thing is to take an array and convert it to a regex. Which requires treating each word and then combining each individual regex into one that matches any of the words:
const array = ['reading', 'books'];
const string = 'If you want to read the book, just read it.';
//generate the pattern
let pattern = array
.map(toPattern) //first, for each word
.join("|"); //join patterns for all words
//convert the pattern to a regex
let regex = new RegExp(pattern, "g");
let result = string.replace(regex, "");
//desiredOutput: 'If you want to the , just it.';
console.log(result);
function allSubstrings(word) {
let substrings = [];
for (let i = word.length; i > 0; i--) {
let sub = word.slice(0, i);
substrings.push(sub)
}
return substrings;
}
function toPattern(word) {
let substrings = allSubstrings(word);
let pattern = substrings.join("|");
return pattern;
}
So, this is how you can generate a regular expression from that array. In this case, that works, but it's not guaranteed to, because there is a danger it could match something you don't want. For example, r will match any character, it doesn't necessarily need to be in a word that matches this.
const array = ['reading'];
const string = 'The quick brown fox jumps over the lazy dog';
// ^ ^
let pattern = array
.map(word => allSubstrings(word).join("|"))
.join("|");
let regex = new RegExp(pattern, "g");
let result = string.replace(regex, "");
console.log(result);
function allSubstrings(word) {
let substrings = [];
for (let i = word.length; i > 0; i--) {
let sub = word.slice(0, i);
substrings.push(sub)
}
return substrings;
}
Which is when it becomes more complicated, as you want to generate a more complicated pattern for each word. You generally want to match words, so you can use the word boundary character \b which means that the pattern for "reading" can now look like this:
\breading\b|\breadin\b|\breadi\b|\bread\b|\brea\b|\bre\b|\br\b
↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑ ↑↑
In the interest of keeping the output at least somewhat readable, it can instead be put in a group and the whole group made to match a single word:
\b(?:reading|readin|readi|read|rea|re|r)\b
↑↑
||____ non-capturing group
So, you have to generate this pattern
function toPattern(word) {
let substrings = allSubstrings(word);
//escape backslashes, because this is a string literal and we need \b as content
let pattern = "\\b(?:" + substrings.join("|") + ")\\b";
return pattern;
}
Which leads us to this
const array = ['reading', 'books'];
const string = 'The quick brown fox jumps over the lazy dog. If you want to read the book, just read it.';
let pattern = array
.map(toPattern)
.join("|");
let regex = new RegExp(pattern, "g");
let result = string.replace(regex, "");
console.log(result);
function allSubstrings(word) {
let substrings = [];
for (let i = word.length; i > 0; i--) {
let sub = word.slice(0, i);
substrings.push(sub)
}
return substrings;
}
function toPattern(word) {
let substrings = allSubstrings(word);
let pattern = "\\b(?:" + substrings.join("|") + ")\\b";
return pattern;
}
This will suffice to solve your task. So it's possible to generate a regex. The final one looks like this:
/\b(?:reading|readin|readi|read|rea|re|r)\b|\b(?:books|book|boo|bo|b)\b/g
But most of the generation of it is spent trying to generate something that works. It's not a necessarily complex solution but as mentioned, the one suggested by CertainPerformance is better because it's simpler which means less chance of it failing and it would be easier to maintain for the future.

I don't know of a straight way to do it, but you can create your own regexp pattern, like so:
// This function create a regex pattern string for each word in the array.
// The str is the string value (the word),
// min is the minimum required letters in eac h word
function getRegexWithMinChars(str, min) {
var charArr = str.split("");
var length = charArr.length;
var regexpStr = "";
for(var i = 0; i < length; i++){
regexpStr +="[" + charArr[i] + "]" + (i < min ? "" : "?");
}
return regexpStr;
}
// This function returns a regexp object with the patters of the words in the array
function getStrArrayRegExWithMinChars(strArr, min) {
var length = strArr.length;
var regexpStr = "";
for(var i = 0; i < length; i++) {
regexpStr += "(" + getRegexWithMinChars(strArr[i], min) + ")?";
}
return new RegExp(regexpStr, "gm");
}
var regexp = getStrArrayRegExWithMinChars(searchArr, 3);
// With the given regexp I was able to use string replace to
// find and replace all the words in the string
str.replace(regexp, "");
//The same can be done with one ES6 function
const getStrArrayRegExWithMinChars = (searchArr, min) => {
return searchArr.reduce((wordsPatt, word) => {
const patt = word.split("").reduce((wordPatt, letter, index) => {
return wordPatt + "[" + letter + "]" + (index < min ? "" : "?");
},"");
return wordsPatt + "(" + patt + ")?";
}, "");
}
var regexp = getStrArrayRegExWithMinChars(searchArr, 3);
// With the given regexp I was able to use string replace to
// find and replace all the words in the string
str.replace(regexp, "");

Replace capture group content in JavaScript

In my JavaScript code I have a regular expression with capture groups (that is configured by library user) and a source string which matches this regular expression. The regular expression matches whole string (i.e. it has ^ and $ characters at its start and end).
A silly example:
var regex = /^([a-zA-Z]{2})-([0-9]{3})_.*$/;
var sourceStr = "ab-123_foo";
I want to reassemble the source string, replacing values in the capture groups and leaving the rest of the string intact. Note that, while this example has most of the "rest of the string" at its end, it actually may be anywhere else.
For example:
var replacements = [ "ZX", "321" ];
var expectedString = "ZX-321_foo";
Is there a way to do this in JavaScript?
NB: The regular expression is configured by the library user via the legacy API. I can not ask user to provide a second regular expression to solve this problem.

Without changing the regex the best I can think of is a callback that replaces the matches
sourceStr = sourceStr.replace(regex, function(match, $1, $2, offset, str) {
return str.replace($1, replacements[0]).replace($2, replacements[1]);
});
That's not a very good solution, as it would fail on something like
var sourceStr = "ab_ab-123_foo";
as it would replace the first ab instead of the matched one etc. but works for the given example and any string that doesn't repeat the matched characters
var regex = /^([a-zA-Z]{2})-([0-9]{3})_.*$/;
var sourceStr = "ab-123_foo";
var replacements = [ "ZX", "321" ];
sourceStr = sourceStr.replace(regex, function(match, $1, $2, offset, str) {
return str.replace($1, replacements[0]).replace($2, replacements[1]);
});
document.body.innerHTML = sourceStr;

I think this is close. It satisfies the two test cases but I'm unsure about leading and trailing groupings.
function replacer (regex, sourceStr, replacements) {
// Make a new regex that adds groups to ungrouped items.
var groupAll = "";
var lastIndex = 0;
var src = regex.source;
var reGroup=/\(.*?\)/g;
var match;
while(match = reGroup.exec(src)){
groupAll += "(" + src.substring(lastIndex, match.index) + ")";
groupAll += match[0];
lastIndex = match.index + match[0].length;
}
var reGroupAll = new RegExp(groupAll);
// Replace the original groupings with the replacements
// and append what was previously ungrouped.
var rep = sourceStr.replace(reGroupAll, function(){
// (match, $1, $2, ..., index, source)
var len = arguments.length - 2;
var ret = "";
for (var i = 1,j=0; i < len; i+=2,j++) {
ret += arguments[i];
ret += replacements[j];
}
return ret;
});
return rep;
}
var regex = /^([a-zA-Z]{2})-([0-9]{3})_.*$/;
var sourceStr = "ab-123_foo";
var replacements = [ "ZX", "321" ];
var expectedString = "ZX-321_foo";
var replaced = replacer(regex, sourceStr, replacements);
console.log(replaced);
console.log(replaced === expectedString);
regex = /^.*_([a-zA-Z]{2})-([0-9]{3})$/;
sourceStr = "ab_ab-123";
expectedString = "ab_ZX-321";
var replaced = replacer(regex, sourceStr, replacements);
console.log(replaced);
console.log(replaced === expectedString);
Output:
ZX-321_foo
true
ab_ZX-321
true

javascript match returning word boundry

RegEx is not my strong suit. I hope one of you can help me.
I am trying to use javacript.match() to search for all hashed(# at the beginning) words.
and I get a returned white space.
string = "#foo #bar"
result = string.match(/(^|\W)(#[a-z\d][\w-]*)/ig);
console.log(result)
Results in:
["#foo", " #bar"]
Notice the whitespace in #bar.
What should I change to my RegEx to exclude the boundary in the return to .match?
Thanks for the help!!

You don't need (^|\W) before as # is considered non word character anyway. You are getting space because \W will match space also.
This regex would work better:
var re = /(?:^|\s)(#[a-z\d][\w-]*)/g,
matches = [],
input = "#foo #bar abc#baz";
while (match = re.exec(input)) matches.push(match[1].trim());
console.log(matches);
//=> ["#foo", "#bar"]
EDIT: To avoid loops:
var m = [];
var str = "#foo #bar abc#baz";
str.replace(/(^|\s)(#[a-z\d][\w-]*)/g, function($1) { m.push($1.trim()); return $1; } );
console.log(m);
//=> ["#foo", "#bar"]

You need to use this syntax to extract capture groups:
var str = '#foo #bar';
var myRegexp = new RegExp('(?:^|\\W)(#[^\\W_][\\w-]*)', 'g');
var matchResult = myRegexp.exec(str);
var result = Array();
while (matchResult != null) {
result.push(matchResult[1]);
matchResult = myRegexp.exec(str);
}
console.log(result);
If you don't want to loop through the match results, you can use this trick:
var str = '#foo #bar';
var result = Array();
str.replace(/(?:^|\W)(#[^\W_][\w-]*)/g, function (m, g1) { result.push(g1); } );
console.log(result);

Javascript: String search for regex, starting at the end of the string

Is there a javascript string function that search a regex and it will start the search at the end?
If not, what is the fastest and/or cleanest way to search the index of a regex starting from the end?
example of regex:
/<\/?([a-z][a-z0-9]*)\b[^>]*>?/gi

Maybe this can be useful and easier:
str.lastIndexOf(str.match(<your_regex_here>).pop());

Perhaps something like this is suitable for you?
Javascript
function lastIndexOfRx(string, regex) {
var match = string.match(regex);
return match ? string.lastIndexOf(match.slice(-1)) : -1;
}
var rx = /<\/?([a-z][a-z0-9]*)\b[^>]*>?/gi;
console.log(lastIndexOfRx("", rx));
console.log(lastIndexOfRx("<i>it</i><b>bo</b>", rx));
jsFiddle
And just for interest, this function vs the function that you choose to go with. jsperf
This requires that you format your regex correctly for matching exactly the pattern you want and globally (like given in your question), for example /.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i will not work with this function. But what you do get is a function that is clean and fast.

You may create a reverse function like:
function reverse (s) {
var o = '';
for (var i = s.length - 1; i >= 0; i--)
o += s[i];
return o;
}
and then use
var yourString = reverse("Your string goes here");
var regex = new Regex(your_expression);
var result = yourString.match(regex);
Another idea: if you want to search by word in reverse order then
function reverseWord(s) {
var o = '';
var split = s.split(' ');
for (var i = split.length - 1; i >= 0; i--)
o += split[i] + ' ';
return o;
}
var yourString = reverseWord("Your string goes here");
var regex = new Regex(your_expression);
var result = yourString.match(regex);

Andreas gave this from the comment:
https://stackoverflow.com/a/274094/402037
String.prototype.regexLastIndexOf = function(regex, startpos) {
regex = (regex.global) ? regex : new RegExp(regex.source, "g" + (regex.ignoreCase ? "i" : "") + (regex.multiLine ? "m" : ""));
if(typeof (startpos) == "undefined") {
startpos = this.length;
} else if(startpos < 0) {
startpos = 0;
}
var stringToWorkWith = this.substring(0, startpos + 1);
var lastIndexOf = -1;
var nextStop = 0;
while((result = regex.exec(stringToWorkWith)) != null) {
lastIndexOf = result.index;
regex.lastIndex = ++nextStop;
}
return lastIndexOf;
}
Which gives the functionality that I need, I tested my regex, and it is successful. So I'll use this

It depends what you exactly want to search for. You can use string.lastIndexOf or inside the regexp to use $ (end of the string).
Update:
try the regexp
/<\/?([a-z][a-z0-9]*)\b[^>]*>?[\w\W]*$/gi

var m = text.match(/.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i);
if (m) {
textFound = m[1];
position = text.lastIndexOf(textFound);
}
Use .* to skip as much text as posible, capture the text found and search it with lastIndexOf
EDIT:
Well, if text is found, no need to search with lastIndexOf. m[0] contains the full coincidence (including all the initial padding), and m[1] the searched text. So position of found text is m[0].length - m[1].length
var m = text.match(/.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i);
if (m) {
textFound = m[1];
position = m[0].length - m[1].length;
}

Assuming you're looking for a string 'token', then you need the position of 'token' that has no other 'token' following until the end of the string.
So you should compose your regex something like that:
$token = 'token';
$re = "/(?:$token)[^(?:$token)]*$/";
This will find your 'token' where no further 'token' can be found until string end. The "(?:" grouping simply makes the group non-storing, slightly speeding up performance and saving memory.

Return positions of a regex match() in Javascript?

Is there a way to retrieve the (starting) character positions inside a string of the results of a regex match() in Javascript?

exec returns an object with a index property:
var match = /bar/.exec("foobar");
if (match) {
console.log("match found at " + match.index);
}
And for multiple matches:
var re = /bar/g,
str = "foobarfoobar";
while ((match = re.exec(str)) != null) {
console.log("match found at " + match.index);
}

Here's what I came up with:
// Finds starting and ending positions of quoted text
// in double or single quotes with escape char support like \" \'
var str = "this is a \"quoted\" string as you can 'read'";
var patt = /'((?:\\.|[^'])*)'|"((?:\\.|[^"])*)"/igm;
while (match = patt.exec(str)) {
console.log(match.index + ' ' + patt.lastIndex);
}

In modern browsers, you can accomplish this with string.matchAll().
The benefit to this approach vs RegExp.exec() is that it does not rely on the regex being stateful, as in #Gumbo's answer.
let regexp = /bar/g;
let str = 'foobarfoobar';
let matches = [...str.matchAll(regexp)];
matches.forEach((match) => {
console.log("match found at " + match.index);
});

From developer.mozilla.org docs on the String .match() method:
The returned Array has an extra input property, which contains the
original string that was parsed. In addition, it has an index
property, which represents the zero-based index of the match in the
string.
When dealing with a non-global regex (i.e., no g flag on your regex), the value returned by .match() has an index property...all you have to do is access it.
var index = str.match(/regex/).index;
Here is an example showing it working as well:
var str = 'my string here';
var index = str.match(/here/).index;
console.log(index); // <- 10
I have successfully tested this all the way back to IE5.

You can use the search method of the String object. This will only work for the first match, but will otherwise do what you describe. For example:
"How are you?".search(/are/);
// 4

Here is a cool feature I discovered recently, I tried this on the console and it seems to work:
var text = "border-bottom-left-radius";
var newText = text.replace(/-/g,function(match, index){
return " " + index + " ";
});
Which returned: "border 6 bottom 13 left 18 radius"
So this seems to be what you are looking for.

I'm afraid the previous answers (based on exec) don't seem to work in case your regex matches width 0. For instance (Note: /\b/g is the regex that should find all word boundaries) :
var re = /\b/g,
str = "hello world";
var guard = 10;
while ((match = re.exec(str)) != null) {
console.log("match found at " + match.index);
if (guard-- < 0) {
console.error("Infinite loop detected")
break;
}
}
One can try to fix this by having the regex match at least 1 character, but this is far from ideal (and means you have to manually add the index at the end of the string)
var re = /\b./g,
str = "hello world";
var guard = 10;
while ((match = re.exec(str)) != null) {
console.log("match found at " + match.index);
if (guard-- < 0) {
console.error("Infinite loop detected")
break;
}
}
A better solution (which does only work on newer browsers / needs polyfills on older/IE versions) is to use String.prototype.matchAll()
var re = /\b/g,
str = "hello world";
console.log(Array.from(str.matchAll(re)).map(match => match.index))
Explanation:
String.prototype.matchAll() expects a global regex (one with g of global flag set). It then returns an iterator. In order to loop over and map() the iterator, it has to be turned into an array (which is exactly what Array.from() does). Like the result of RegExp.prototype.exec(), the resulting elements have an .index field according to the specification.
See the String.prototype.matchAll() and the Array.from() MDN pages for browser support and polyfill options.
Edit: digging a little deeper in search for a solution supported on all browsers
The problem with RegExp.prototype.exec() is that it updates the lastIndex pointer on the regex, and next time starts searching from the previously found lastIndex.
var re = /l/g,
str = "hello world";
console.log(re.lastIndex)
re.exec(str)
console.log(re.lastIndex)
re.exec(str)
console.log(re.lastIndex)
re.exec(str)
console.log(re.lastIndex)
This works great as long as the regex match actually has a width. If using a 0 width regex, this pointer does not increase, and so you get your infinite loop (note: /(?=l)/g is a lookahead for l -- it matches the 0-width string before an l. So it correctly goes to index 2 on the first call of exec(), and then stays there:
var re = /(?=l)/g,
str = "hello world";
console.log(re.lastIndex)
re.exec(str)
console.log(re.lastIndex)
re.exec(str)
console.log(re.lastIndex)
re.exec(str)
console.log(re.lastIndex)
The solution (that is less nice than matchAll(), but should work on all browsers) therefore is to manually increase the lastIndex if the match width is 0 (which may be checked in different ways)
var re = /\b/g,
str = "hello world";
while ((match = re.exec(str)) != null) {
console.log("match found at " + match.index);
// alternative: if (match.index == re.lastIndex) {
if (match[0].length == 0) {
// we need to increase lastIndex -- this location was already matched,
// we don't want to match it again (and get into an infinite loop)
re.lastIndex++
}
}

This member fn returns an array of 0-based positions, if any, of the input word inside the String object
String.prototype.matching_positions = function( _word, _case_sensitive, _whole_words, _multiline )
{
/*besides '_word' param, others are flags (0|1)*/
var _match_pattern = "g"+(_case_sensitive?"i":"")+(_multiline?"m":"") ;
var _bound = _whole_words ? "\\b" : "" ;
var _re = new RegExp( _bound+_word+_bound, _match_pattern );
var _pos = [], _chunk, _index = 0 ;
while( true )
{
_chunk = _re.exec( this ) ;
if ( _chunk == null ) break ;
_pos.push( _chunk['index'] ) ;
_re.lastIndex = _chunk['index']+1 ;
}
return _pos ;
}
Now try
var _sentence = "What do doers want ? What do doers need ?" ;
var _word = "do" ;
console.log( _sentence.matching_positions( _word, 1, 0, 0 ) );
console.log( _sentence.matching_positions( _word, 1, 1, 0 ) );
You can also input regular expressions:
var _second = "z^2+2z-1" ;
console.log( _second.matching_positions( "[0-9]\z+", 0, 0, 0 ) );
Here one gets the position index of linear term.

var str = "The rain in SPAIN stays mainly in the plain";
function searchIndex(str, searchValue, isCaseSensitive) {
var modifiers = isCaseSensitive ? 'gi' : 'g';
var regExpValue = new RegExp(searchValue, modifiers);
var matches = [];
var startIndex = 0;
var arr = str.match(regExpValue);
[].forEach.call(arr, function(element) {
startIndex = str.indexOf(element, startIndex);
matches.push(startIndex++);
});
return matches;
}
console.log(searchIndex(str, 'ain', true));

I had luck using this single-line solution based on matchAll (my use case needs an array of string positions)
let regexp = /bar/g;
let str = 'foobarfoobar';
let matchIndices = Array.from(str.matchAll(regexp)).map(x => x.index);
console.log(matchIndices)
output: [3, 9]

function trimRegex(str, regex){
return str.substr(str.match(regex).index).split('').reverse().join('').substr(str.match(regex).index).split('').reverse().join('');
}
let test = '||ab||cd||';
trimRegex(test, /[^|]/);
console.log(test); //output: ab||cd
or
function trimChar(str, trim, req){
let regex = new RegExp('[^'+trim+']');
return str.substr(str.match(regex).index).split('').reverse().join('').substr(str.match(regex).index).split('').reverse().join('');
}
let test = '||ab||cd||';
trimChar(test, '|');
console.log(test); //output: ab||cd

var str = 'my string here';
var index = str.match(/hre/).index;
alert(index); // <- 10

Develop Reference

JavaScript is the programming language of the Web.

How to match overlapping keywords with regex - javascript

How about: var re = /((sam)(?:wise)?)/; var m = 'samwise'.match(re); // gives ["samwise", "samwise", "sam"] var m = 'sam'.match(re); // gives ["sam", "sam", "sam"] You can use Unique values in an array to remove dupplicates.

Related

How to remove word in string based on array in Javascript when word's character length in string is fewer than in array?

Replace capture group content in JavaScript

javascript match returning word boundry

Javascript: String search for regex, starting at the end of the string

Return positions of a regex match() in Javascript?

Categories

Resources