javascript regex pattern with an array - javascript

I want to create a regex pattern which should be able to search through an array.
Let's say :
var arr = [ "first", "second", "third" ];
var match = text.match(/<arr>/);
which should be able to match only
<first> or <second> or <third> ......
but should ignore
<ffirst> or <dummy>
I need an efficient approach please .
Any help would be great .
Thanks

First you can do array.map to quote all special regex characters.
Then you can do array.join to join the array elements using | and create an instance of RegExp.
Code:
function quoteSpecial(str) { return str.replace(/([\[\]^$|()\\+*?{}=!.])/g, '\\$1'); }
var arr = [ "first", "second", "third", "fo|ur" ];
var re = new RegExp('<(?:' + arr.map(quoteSpecial).join('|') + ')>');
//=> /<(?:first|second|third|fo\|ur)>/
then use this RegExp object:
'<first>'.match(re); // ["<first>"]
'<ffirst>'.match(re); // null
'<dummy>'.match(re); // null
'<second>'.match(re); // ["<second>"]
'<fo|ur>'.match(re); // ["<fo|ur>"]

You should search for a specific word from a list using (a|b|c).
The list is made from the arr by joining the values with | char as glue
var arr = [ "first", "second", "third" ];
var match = text.match(new RegExp("<(?:"+arr.join("|")+")>")); //matches <first> <second> and <third>
Note that if your "source" words might contain regular expression's preserved characters - you might get into trouble - so you might need to escape those characters before joining the array
A good function for doing so can be found here:
function regexpQuote(str, delimiter) {
return String(str)
.replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\' + (delimiter || '') + '-]', 'g'), '\\$&');
}
so in this case you'll have
function escapeArray(arr){
var escaped = [];
for(var i in arr){
escaped.push(regexpQuote(arr[i]));
}
return escaped;
}
var arr = [ "first", "second", "third" ];
var pattern = new RegExp("<(?:"+escapeArray(arr).join("|")+")>");
var match = text.match(pattern); //matches <first> <second> and <third>

Related

How to find words followed by space and replace with tag?

I have some string values in the array
var anchors = [
"Can",
", please?"
];
I have string like the below example
<anchor>0</anchor> you repeat that<anchor>1</anchor>
I want to frame the resulting string in the below
Can <drop></drop> <drop></drop> <drop></drop>, please?
I want to replace all the <anchor>0</anchor> to the words in the given array and words into <drop></drop> tag
Someone please help me to frame the resulting string using javascript string operations or regex
You can try something like below
var myarray =[ "Can",",please?" ];
var inputstring ="<anchor>0</anchor> you repeat that <anchor>1</anchor>"
var len=inputstring.split(" ").length-1
var inputstring1;
for(var i=0;i<=myarray.length;i++)
{
inputstring1 += inputstring.replace("<anchor>"+i+"</anchor>",myarray[i])
}
var inputstring2=inputstring
for(var i=0;i<=myarray.length;i++)
{
inputstring2 = inputstring2.replace("<anchor>"+i+"</anchor>","")
}
myarrayremovedelemets=inputstring2.split(" ")
myarrayremovedelemets.push.apply(myarrayremovedelemets, myarray);
alert(myarrayremovedelemets);
then replace inputstring1 with <drop></drop>
You can use the following logic with callback function:
Output:
Can <drop></drop> <drop></drop> <drop></drop>, please?
Code:
var anchors = [
"Can",
", please?"
];
var replaceCallback = function(match, g1, g2) {
if(g1 != null) return anchors[g1];
if(g2 != null) return "<drop></drop>";
return match;
}
var str = "<anchor>0</anchor> you repeat that<anchor>1</anchor>";
str = str.replace(/<anchor>(\d+)<\/anchor>|(\w+)/g, replaceCallback);
alert(str);
Edit: Updated regex:
/<anchor>(\d+)<\/anchor>|((?:(?!<anchor>)\S)+)/g
Edit: For single:
/<(?:anchor|single)>(\d+)<\/(?:anchor|single)>|((?:(?!<(?:anchor|single)>)\S)+)/g

Split a string at nth occurrence of a regex in javascript

I know split can get a second parameter as a limit, but it is not what I'm looking for. And I know it can be done by splitting and joining again with a solid string delimiter.
The problem is the delimiter is a regular expression and I don't know the exact length of the pattern that matches.
Consider this string:
this is a title
--------------------------
rest is body! even if there are some dashes!
--------
---------------------
it should not be counted as a separated part!
By using this:
str.split(/---*\n/);
I will get:
[
'this is a title',
'rest is body! even if there are some dashes.!',
'',
'it should not be counted as a separated part!'
]
And this is what I wanted to be: (if I want to split by the first occurrence)
[
'this is a title',
'rest is body! even if there are some dashes.!\n--------\n---------------------\nit should not be counted as a separated part!'
]
This solution is what I currently have, but it's just for the first occurrence.
function split(str, regex) {
var match = str.match(regex);
return [str.substr(0, match.index), str.substr(match.index+match[0].length)];
}
Any ideas on how to generalize the solution for any number n to split the string on the nth occurrence of regex?
var str= "this-----that---these------those";
var N= 2;
var regex= new RegExp( "^((?:[\\s\\S]*?---*){"+(N-1)+"}[\\s\\S]*?)---*([\\s\\S]*)$" );
var result= regex.exec(str).slice(1,3);
console.log(result);
Output:
["this-----that", "these------those"]
jsFiddle
Option with the function:
var generateRegExp= function (N) {
return new RegExp( "^((?:[\\s\\S]*?---*){"+(N-1)+"}[\\s\\S]*?)---*([\\s\\S]*)$" );
};
var getSlice= function(str, regexGenerator, N) {
return regexGenerator(N).exec(str).slice(1,3);
};
var str= "this-----that---these------those";
var N= 2;
var result= getSlice(str, generateRegExp, N);
console.log(result);
jsFiddle
Option with the function 2:
var getSlice= function(str, regex, N) {
var re= new RegExp( "^((?:[\\s\\S]*?"+regex+"){"+(N-1)+"}[\\s\\S]*?)"+regex+"([\\s\\S]*)$" );
return re.exec(str).slice(1,3);
};
var str= "this-----that---these------those";
var N= 3;
var result= getSlice(str, "---*", N);
console.log(result);
jsFiddle

Using Regex to pull out a part of a string

I can't figure out how to pull out multiple matches from the following example:
This code:
/prefix-(\w+)/g.exec('prefix-firstname prefix-lastname');
returns:
["prefix-firstname", "firstname"]
How do I get it to return:
[
["prefix-firstname", "firstname"],
["prefix-lastname", "lastname"]
]
Or
["prefix-firstname", "firstname", "prefix-lastname", "lastname"]
This will do what you want:
var str="prefix-firstname prefix-lastname";
var out =[];
str.replace(/prefix-(\w+)/g,function(match, Group) {
var row = [match, Group]
out.push(row);
});
Probably a mis-use of .replace, but I don't think you can pass a function to .match...
_Pez
Using a loop:
re = /prefix-(\w+)/g;
str = 'prefix-firstname prefix-lastname';
match = re.exec(str);
while (match != null) {
match = re.exec(str);
}
You get each match one at a time.
Using match:
Here, the regex will have to be a bit different, because you cannot get sub-captures (or I don't know how to do it with multiple matches)...
re = /[^\s-]+(?=\s|$)/g;
str = 'prefix-firstname prefix-lastname';
match = str.match(re);
alert(match);
[^\s-]+ matches all characters except spaces and dashes/hyphens only if they are followed by a space or are at the end of the string, which is a confition imposed by (?=\s|$).
You can find the groups in two steps:
"prefix-firstname prefix-lastname".match(/prefix-\w+/g)
.map(function(s) { return s.match(/prefix-(\w+)/) })

Regexp with global flag only matching first match

Why does:
/(\[#([0-9]{8})\])/g.exec("[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355]")
return
["[#12345678]", "[#12345678]", "12345678"]
I want it to match all those numbers but it appears to be too greedy.
[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355] 12345678 87654321 56233001 36381069 23416459 56435355
That's how .exec() works. To get multiple results, run it in a loop.
var re = /(\[#([0-9]{8})\])/g,
str = "[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355]",
match;
while (match = re.exec(str)) {
console.log(match);
}
Also, the outer capture group seems extraneous. You should probably get rid of that.
/\[#([0-9]{8})\]/g,
Result:
[
"[#12345678]",
"12345678"
],
[
"[#87654321]",
"87654321"
],
[
"[#56233001]",
"56233001"
],
[
"[#36381069]",
"36381069"
],
[
"[#23416459]",
"23416459"
],
[
"[#56435355]",
"56435355"
]
You can use replace method of a string to collect all the matches:
var s = "[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355]";
var re = /\[#([0-9]{8})\]/g;
var l = [];
s.replace(re, function($0, $1) {l.push($1)});
// l == ["12345678", "87654321", "56233001", "36381069", "23416459", "56435355"]
regex.exec returns the groups in your regex (the things wrapped in parenthesis).
The function you're looking for is one you call on the string, match.
string.match(regex) returns all of the matches.
"[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355]".match(/(\[#([0-9]{8})\])/g)
// yields: ["[#12345678]", "[#87654321]", "[#56233001]", "[#36381069]", "[#23416459]", "[#56435355]"]
EDIT:
If you just want the numbers without the brackets and the #, just change the regex to /\d{8}/g
"[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355]".match(/[0-9]{8}/g)
// yields: ["12345678", "87654321", "56233001", "36381069", "23416459", "56435355"]
Try this:
var re = /\[#(\d{8})\]/g;
var sourcestring = "[#12345678] [#87654321] [#56233001] [#36381069] [#23416459] [#56435355]";
var results = [];
var i = 0;
var matches;
while (matches = re.exec(sourcestring)) {
results[i] = matches;
alert(results[i][1]);
i++;
}
In ES2020 there is a new feature added matchAll witch does the job.

Javascript split string on space or on quotes to array

var str = 'single words "fixed string of words"';
var astr = str.split(" "); // need fix
I would like the array to be like this:
var astr = ["single", "words", "fixed string of words"];
The accepted answer is not entirely correct. It separates on non-space characters like . and - and leaves the quotes in the results. The better way to do this so that it excludes the quotes is with capturing groups, like such:
//The parenthesis in the regex creates a captured group within the quotes
var myRegexp = /[^\s"]+|"([^"]*)"/gi;
var myString = 'single words "fixed string of words"';
var myArray = [];
do {
//Each call to exec returns the next regex match as an array
var match = myRegexp.exec(myString);
if (match != null)
{
//Index 1 in the array is the captured group if it exists
//Index 0 is the matched text, which we use if no captured group exists
myArray.push(match[1] ? match[1] : match[0]);
}
} while (match != null);
myArray will now contain exactly what the OP asked for:
single,words,fixed string of words
str.match(/\w+|"[^"]+"/g)
//single, words, "fixed string of words"
This uses a mix of split and regex matching.
var str = 'single words "fixed string of words"';
var matches = /".+?"/.exec(str);
str = str.replace(/".+?"/, "").replace(/^\s+|\s+$/g, "");
var astr = str.split(" ");
if (matches) {
for (var i = 0; i < matches.length; i++) {
astr.push(matches[i].replace(/"/g, ""));
}
}
This returns the expected result, although a single regexp should be able to do it all.
// ["single", "words", "fixed string of words"]
Update
And this is the improved version of the the method proposed by S.Mark
var str = 'single words "fixed string of words"';
var aStr = str.match(/\w+|"[^"]+"/g), i = aStr.length;
while(i--){
aStr[i] = aStr[i].replace(/"/g,"");
}
// ["single", "words", "fixed string of words"]
Here might be a complete solution:
https://github.com/elgs/splitargs
ES6 solution supporting:
Split by space except for inside quotes
Removing quotes but not for backslash escaped quotes
Escaped quote become quote
Can put quotes anywhere
Code:
str.match(/\\?.|^$/g).reduce((p, c) => {
if(c === '"'){
p.quote ^= 1;
}else if(!p.quote && c === ' '){
p.a.push('');
}else{
p.a[p.a.length-1] += c.replace(/\\(.)/,"$1");
}
return p;
}, {a: ['']}).a
Output:
[ 'single', 'words', 'fixed string of words' ]
This will split it into an array and strip off the surrounding quotes from any remaining string.
const parseWords = (words = '') =>
(words.match(/[^\s"]+|"([^"]*)"/gi) || []).map((word) =>
word.replace(/^"(.+(?="$))"$/, '$1'))
This soulution would work for both double (") and single (') quotes:
Code:
str.match(/[^\s"']+|"([^"]*)"/gmi)
// ["single", "words", "fixed string of words"]
Here it shows how this regular expression would work: https://regex101.com/r/qa3KxQ/2
Until I found #dallin 's answer (this thread: https://stackoverflow.com/a/18647776/1904943) I was having difficulty processing strings with a mix of unquoted and quoted terms / phrases, via JavaScript.
In researching that issue, I ran a number of tests.
As I found it difficult to find this information, I have collated the relevant information (below), which may be useful to others seeking answers on the processing in JavaScript of strings containing quoted words.
let q = 'apple banana "nova scotia" "british columbia"';
Extract [only] quoted words and phrases:
// https://stackoverflow.com/questions/12367126/how-can-i-get-a-substring-located-between-2-quotes
const r = q.match(/"([^']+)"/g);
console.log('r:', r)
// r: Array [ "\"nova scotia\" \"british columbia\"" ]
console.log('r:', r.toString())
// r: "nova scotia" "british columbia"
// ----------------------------------------
// [alternate regex] https://www.regextester.com/97161
const s = q.match(/"(.*?)"/g);
console.log('s:', s)
// s: Array [ "\"nova scotia\"", "\"british columbia\"" ]
console.log('s:', s.toString())
// s: "nova scotia","british columbia"
Extract [all] unquoted, quoted words and phrases:
// https://stackoverflow.com/questions/2817646/javascript-split-string-on-space-or-on-quotes-to-array
const t = q.match(/\w+|"[^"]+"/g);
console.log('t:', t)
// t: Array(4) [ "apple", "banana", "\"nova scotia\"", "\"british columbia\"" ]
console.log('t:', t.toString())
// t: apple,banana,"nova scotia","british columbia"
// ----------------------------------------------------------------------------
// https://stackoverflow.com/questions/2817646/javascript-split-string-on-space-or-on-quotes-to-array
// [#dallon 's answer (this thread)] https://stackoverflow.com/a/18647776/1904943
var myRegexp = /[^\s"]+|"([^"]*)"/gi;
var myArray = [];
do {
/* Each call to exec returns the next regex match as an array. */
var match = myRegexp.exec(q); // << "q" = my query (string)
if (match != null)
{
/* Index 1 in the array is the captured group if it exists.
* Index 0 is the matched text, which we use if no captured group exists. */
myArray.push(match[1] ? match[1] : match[0]);
}
} while (match != null);
console.log('myArray:', myArray, '| type:', typeof(myArray))
// myArray: Array(4) [ "apple", "banana", "nova scotia", "british columbia" ] | type: object
console.log(myArray.toString())
// apple,banana,nova scotia,british columbia
Work with a set (rather than an array):
// https://stackoverflow.com/questions/28965112/javascript-array-to-set
var mySet = new Set(myArray);
console.log('mySet:', mySet, '| type:', typeof(mySet))
// mySet: Set(4) [ "apple", "banana", "nova scotia", "british columbia" ] | type: object
Iterating over set elements:
mySet.forEach(x => console.log(x));
/* apple
* banana
* nova scotia
* british columbia
*/
// https://stackoverflow.com/questions/16401216/iterate-over-set-elements
myArrayFromSet = Array.from(mySet);
for (let i=0; i < myArrayFromSet.length; i++) {
console.log(i + ':', myArrayFromSet[i])
}
/*
0: apple
1: banana
2: nova scotia
3: british columbia
*/
Asides
The JavaScript responses above are from the FireFox Developer Tools (F12, from web page). I created a blank HTML file that calls a .js file that I edit with Vim, as my IDE. Simple JavaScript IDE
Based on my tests, the cloned set appears to be a deep copy. Shallow-clone an ES6 Map or Set
I noticed the disappearing characters, too. I think you can include them - for example, to have it include "+" with the word, use something like "[\w\+]" instead of just "\w".

Categories

Resources