JavaScript RegExp: Different results: Built pattern using string & using regexp "literal"? - javascript

Is there any differences between using RegExp literals vs strings?
http://jsfiddle.net/yMMrk/
String.prototype.lastIndexOf = function(pattern) {
pattern = pattern + "(?![\s\S]*" + pattern + ")";
var match = this.match(pattern);
return (match == null) ? -1 : match.index;
}
function indexOfLastNewline(str) {
var match = str.match(/\r?\n(?![\s\S]*(\r?\n))/);
return (match == null) ? -1 : match.index;
}
var str = "Hello 1\nHello 2\nHello 3\nHello4";
alert(str.lastIndexOf("(\r?\n)")); // always returns the 1st newline (7)
alert(indexOfLastNewline(str)); // returns correctly (23)
Update
even if I use a RegExp object, I still get the same result
http://jsfiddle.net/yMMrk/2/

You need to escape your \ in the string version as \\, like this:
String.prototype.lastIndexOf = function(pattern) {
pattern = pattern + "(?![\\s\\S]*" + pattern + ")";
var match = this.match(pattern);
return (match == null) ? -1 : match.index;
}
function indexOfLastNewline(str) {
var match = str.match(/\r?\n(?![\s\S]*(\r?\n))/);
return (match == null) ? -1 : match.index;
}
var str = "Hello 1\nHello 2\nHello 3\nHello4";
alert(str.lastIndexOf("(\\r?\\n)")); // returns correctly (23)
alert(indexOfLastNewline(str)); // returns correctly (23)
You can test it out here.

Generally speaking, if you use a string to construct a regex, you need to escape backslashes; if you use a regex literal, you need to escape slashes if they occur in your regex.
So the regex
\s/
can be written as a JavaScript string like this:
"\\s/"
and as a JavaScript regex literal like this:
/\s\//
Also, there is a difference in the handling of mode modifiers. For example, to make a regex case-insensitive, you can construct a regex object from a JavaScript string like this:
var myre = new RegExp("[a-z]", "i");
With a regex literal, you can do that directly:
var myre = /[a-z]/i;
Also, see this tutorial.

Related

Possible Extra String Quote Via PHP In Javascript [duplicate]

I am designing a regular expression tester in HTML and JavaScript. The user will enter a regex, a string, and choose the function they want to test with (e.g. search, match, replace, etc.) via radio button and the program will display the results when that function is run with the specified arguments. Naturally there will be extra text boxes for the extra arguments to replace and such.
My problem is getting the string from the user and turning it into a regular expression. If I say that they don't need to have //'s around the regex they enter, then they can't set flags, like g and i. So they have to have the //'s around the expression, but how can I convert that string to a regex? It can't be a literal since its a string, and I can't pass it to the RegExp constructor since its not a string without the //'s. Is there any other way to make a user input string into a regex? Will I have to parse the string and flags of the regex with the //'s then construct it another way? Should I have them enter a string, and then enter the flags separately?
Use the RegExp object constructor to create a regular expression from a string:
var re = new RegExp("a|b", "i");
// same as
var re = /a|b/i;
var flags = inputstring.replace(/.*\/([gimy]*)$/, '$1');
var pattern = inputstring.replace(new RegExp('^/(.*?)/'+flags+'$'), '$1');
var regex = new RegExp(pattern, flags);
or
var match = inputstring.match(new RegExp('^/(.*?)/([gimy]*)$'));
// sanity check here
var regex = new RegExp(match[1], match[2]);
Here is a one-liner: str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&')
I got it from the escape-string-regexp NPM module.
Trying it out:
escapeStringRegExp.matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g;
function escapeStringRegExp(str) {
return str.replace(escapeStringRegExp.matchOperatorsRe, '\\$&');
}
console.log(new RegExp(escapeStringRegExp('example.com')));
// => /example\.com/
Using tagged template literals with flags support:
function str2reg(flags = 'u') {
return (...args) => new RegExp(escapeStringRegExp(evalTemplate(...args))
, flags)
}
function evalTemplate(strings, ...values) {
let i = 0
return strings.reduce((str, string) => `${str}${string}${
i < values.length ? values[i++] : ''}`, '')
}
console.log(str2reg()`example.com`)
// => /example\.com/u
Use the JavaScript RegExp object constructor.
var re = new RegExp("\\w+");
re.test("hello");
You can pass flags as a second string argument to the constructor. See the documentation for details.
In my case the user input somethimes was sorrounded by delimiters and sometimes not. therefore I added another case..
var regParts = inputstring.match(/^\/(.*?)\/([gim]*)$/);
if (regParts) {
// the parsed pattern had delimiters and modifiers. handle them.
var regexp = new RegExp(regParts[1], regParts[2]);
} else {
// we got pattern string without delimiters
var regexp = new RegExp(inputstring);
}
Try using the following function:
const stringToRegex = str => {
// Main regex
const main = str.match(/\/(.+)\/.*/)[1]
// Regex options
const options = str.match(/\/.+\/(.*)/)[1]
// Compiled regex
return new RegExp(main, options)
}
You can use it like so:
"abc".match(stringToRegex("/a/g"))
//=> ["a"]
Here is my one liner function that handles custom delimiters and invalid flags
// One liner
var stringToRegex = (s, m) => (m = s.match(/^([\/~#;%#'])(.*?)\1([gimsuy]*)$/)) ? new RegExp(m[2], m[3].split('').filter((i, p, s) => s.indexOf(i) === p).join('')) : new RegExp(s);
// Readable version
function stringToRegex(str) {
const match = str.match(/^([\/~#;%#'])(.*?)\1([gimsuy]*)$/);
return match ?
new RegExp(
match[2],
match[3]
// Filter redundant flags, to avoid exceptions
.split('')
.filter((char, pos, flagArr) => flagArr.indexOf(char) === pos)
.join('')
)
: new RegExp(str);
}
console.log(stringToRegex('/(foo)?\/bar/i'));
console.log(stringToRegex('#(foo)?\/bar##gi')); //Custom delimiters
console.log(stringToRegex('#(foo)?\/bar##gig')); //Duplicate flags are filtered out
console.log(stringToRegex('/(foo)?\/bar')); // Treated as string
console.log(stringToRegex('gig')); // Treated as string
I suggest you also add separate checkboxes or a textfield for the special flags. That way it is clear that the user does not need to add any //'s. In the case of a replace, provide two textfields. This will make your life a lot easier.
Why? Because otherwise some users will add //'s while other will not. And some will make a syntax error. Then, after you stripped the //'s, you may end up with a syntactically valid regex that is nothing like what the user intended, leading to strange behaviour (from the user's perspective).
This will work also when the string is invalid or does not contain flags etc:
function regExpFromString(q) {
let flags = q.replace(/.*\/([gimuy]*)$/, '$1');
if (flags === q) flags = '';
let pattern = (flags ? q.replace(new RegExp('^/(.*?)/' + flags + '$'), '$1') : q);
try { return new RegExp(pattern, flags); } catch (e) { return null; }
}
console.log(regExpFromString('\\bword\\b'));
console.log(regExpFromString('\/\\bword\\b\/gi'));
Thanks to earlier answers, this blocks serves well as a general purpose solution for applying a configurable string into a RegEx .. for filtering text:
var permittedChars = '^a-z0-9 _,.?!#+<>';
permittedChars = '[' + permittedChars + ']';
var flags = 'gi';
var strFilterRegEx = new RegExp(permittedChars, flags);
log.debug ('strFilterRegEx: ' + strFilterRegEx);
strVal = strVal.replace(strFilterRegEx, '');
// this replaces hard code solt:
// strVal = strVal.replace(/[^a-z0-9 _,.?!#+]/ig, '');
You can ask for flags using checkboxes then do something like this:
var userInput = formInput;
var flags = '';
if(formGlobalCheckboxChecked) flags += 'g';
if(formCaseICheckboxChecked) flags += 'i';
var reg = new RegExp(userInput, flags);
Safer, but not safe. (A version of Function that didn't have access to any other context would be good.)
const regexp = Function('return ' + string)()
I found #Richie Bendall solution very clean. I added few small modifications because it falls appart and throws error (maybe that's what you want) when passing non regex strings.
const stringToRegex = (str) => {
const re = /\/(.+)\/([gim]?)/
const match = str.match(re);
if (match) {
return new RegExp(match[1], match[2])
}
}
Using [gim]? in the pattern will ignore any match[2] value if it's invalid. You can omit the [gim]? pattern if you want an error to be thrown if the regex options is invalid.
I use eval to solve this problem.
For example:
function regex_exec() {
// Important! Like #Samuel Faure mentioned, Eval on user input is a crazy security risk, so before use this method, please take care of the security risk.
var regex = $("#regex").val();
// eval()
var patt = eval(userInput);
$("#result").val(patt.exec($("#textContent").val()));
}

How to put a variable in my JS regular expression? [duplicate]

I want to add a (variable) tag to values with regex, the pattern works fine with PHP but I have troubles implementing it into JavaScript.
The pattern is (value is the variable):
/(?!(?:[^<]+>|[^>]+<\/a>))\b(value)\b/is
I escaped the backslashes:
var str = $("#div").html();
var regex = "/(?!(?:[^<]+>|[^>]+<\\/a>))\\b(" + value + ")\\b/is";
$("#div").html(str.replace(regex, "" + value + ""));
But this seem not to be right, I logged the pattern and its exactly what it should be.
Any ideas?
To create the regex from a string, you have to use JavaScript's RegExp object.
If you also want to match/replace more than one time, then you must add the g (global match) flag. Here's an example:
var stringToGoIntoTheRegex = "abc";
var regex = new RegExp("#" + stringToGoIntoTheRegex + "#", "g");
// at this point, the line above is the same as: var regex = /#abc#/g;
var input = "Hello this is #abc# some #abc# stuff.";
var output = input.replace(regex, "!!");
alert(output); // Hello this is !! some !! stuff.
JSFiddle demo here.
In the general case, escape the string before using as regex:
Not every string is a valid regex, though: there are some speciall characters, like ( or [. To work around this issue, simply escape the string before turning it into a regex. A utility function for that goes in the sample below:
function escapeRegExp(stringToGoIntoTheRegex) {
return stringToGoIntoTheRegex.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
var stringToGoIntoTheRegex = escapeRegExp("abc"); // this is the only change from above
var regex = new RegExp("#" + stringToGoIntoTheRegex + "#", "g");
// at this point, the line above is the same as: var regex = /#abc#/g;
var input = "Hello this is #abc# some #abc# stuff.";
var output = input.replace(regex, "!!");
alert(output); // Hello this is !! some !! stuff.
JSFiddle demo here.
Note: the regex in the question uses the s modifier, which didn't exist at the time of the question, but does exist -- a s (dotall) flag/modifier in JavaScript -- today.
If you are trying to use a variable value in the expression, you must use the RegExp "constructor".
var regex = "(?!(?:[^<]+>|[^>]+<\/a>))\b(" + value + ")\b";
new RegExp(regex, "is")
I found I had to double slash the \b to get it working. For example to remove "1x" words from a string using a variable, I needed to use:
str = "1x";
var regex = new RegExp("\\b"+str+"\\b","g"); // same as inv.replace(/\b1x\b/g, "")
inv=inv.replace(regex, "");
You don't need the " to define a regular expression so just:
var regex = /(?!(?:[^<]+>|[^>]+<\/a>))\b(value)\b/is; // this is valid syntax
If value is a variable and you want a dynamic regular expression then you can't use this notation; use the alternative notation.
String.replace also accepts strings as input, so you can do "fox".replace("fox", "bear");
Alternative:
var regex = new RegExp("/(?!(?:[^<]+>|[^>]+<\/a>))\b(value)\b/", "is");
var regex = new RegExp("/(?!(?:[^<]+>|[^>]+<\/a>))\b(" + value + ")\b/", "is");
var regex = new RegExp("/(?!(?:[^<]+>|[^>]+<\/a>))\b(.*?)\b/", "is");
Keep in mind that if value contains regular expressions characters like (, [ and ? you will need to escape them.
I found this thread useful - so I thought I would add the answer to my own problem.
I wanted to edit a database configuration file (datastax cassandra) from a node application in javascript and for one of the settings in the file I needed to match on a string and then replace the line following it.
This was my solution.
dse_cassandra_yaml='/etc/dse/cassandra/cassandra.yaml'
// a) find the searchString and grab all text on the following line to it
// b) replace all next line text with a newString supplied to function
// note - leaves searchString text untouched
function replaceStringNextLine(file, searchString, newString) {
fs.readFile(file, 'utf-8', function(err, data){
if (err) throw err;
// need to use double escape '\\' when putting regex in strings !
var re = "\\s+(\\-\\s(.*)?)(?:\\s|$)";
var myRegExp = new RegExp(searchString + re, "g");
var match = myRegExp.exec(data);
var replaceThis = match[1];
var writeString = data.replace(replaceThis, newString);
fs.writeFile(file, writeString, 'utf-8', function (err) {
if (err) throw err;
console.log(file + ' updated');
});
});
}
searchString = "data_file_directories:"
newString = "- /mnt/cassandra/data"
replaceStringNextLine(dse_cassandra_yaml, searchString, newString );
After running, it will change the existing data directory setting to the new one:
config file before:
data_file_directories:
- /var/lib/cassandra/data
config file after:
data_file_directories:
- /mnt/cassandra/data
Much easier way: use template literals.
var variable = 'foo'
var expression = `.*${variable}.*`
var re = new RegExp(expression, 'g')
re.test('fdjklsffoodjkslfd') // true
re.test('fdjklsfdjkslfd') // false
Using string variable(s) content as part of a more complex composed regex expression (es6|ts)
This example will replace all urls using my-domain.com to my-other-domain (both are variables).
You can do dynamic regexs by combining string values and other regex expressions within a raw string template. Using String.raw will prevent javascript from escaping any character within your string values.
// Strings with some data
const domainStr = 'my-domain.com'
const newDomain = 'my-other-domain.com'
// Make sure your string is regex friendly
// This will replace dots for '\'.
const regexUrl = /\./gm;
const substr = `\\\.`;
const domain = domainStr.replace(regexUrl, substr);
// domain is a regex friendly string: 'my-domain\.com'
console.log('Regex expresion for domain', domain)
// HERE!!! You can 'assemble a complex regex using string pieces.
const re = new RegExp( String.raw `([\'|\"]https:\/\/)(${domain})(\S+[\'|\"])`, 'gm');
// now I'll use the regex expression groups to replace the domain
const domainSubst = `$1${newDomain}$3`;
// const page contains all the html text
const result = page.replace(re, domainSubst);
note: Don't forget to use regex101.com to create, test and export REGEX code.
var string = "Hi welcome to stack overflow"
var toSearch = "stack"
//case insensitive search
var result = string.search(new RegExp(toSearch, "i")) > 0 ? 'Matched' : 'notMatched'
https://jsfiddle.net/9f0mb6Lz/
Hope this helps

set regex with input variables [duplicate]

I am designing a regular expression tester in HTML and JavaScript. The user will enter a regex, a string, and choose the function they want to test with (e.g. search, match, replace, etc.) via radio button and the program will display the results when that function is run with the specified arguments. Naturally there will be extra text boxes for the extra arguments to replace and such.
My problem is getting the string from the user and turning it into a regular expression. If I say that they don't need to have //'s around the regex they enter, then they can't set flags, like g and i. So they have to have the //'s around the expression, but how can I convert that string to a regex? It can't be a literal since its a string, and I can't pass it to the RegExp constructor since its not a string without the //'s. Is there any other way to make a user input string into a regex? Will I have to parse the string and flags of the regex with the //'s then construct it another way? Should I have them enter a string, and then enter the flags separately?
Use the RegExp object constructor to create a regular expression from a string:
var re = new RegExp("a|b", "i");
// same as
var re = /a|b/i;
var flags = inputstring.replace(/.*\/([gimy]*)$/, '$1');
var pattern = inputstring.replace(new RegExp('^/(.*?)/'+flags+'$'), '$1');
var regex = new RegExp(pattern, flags);
or
var match = inputstring.match(new RegExp('^/(.*?)/([gimy]*)$'));
// sanity check here
var regex = new RegExp(match[1], match[2]);
Here is a one-liner: str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&')
I got it from the escape-string-regexp NPM module.
Trying it out:
escapeStringRegExp.matchOperatorsRe = /[|\\{}()[\]^$+*?.]/g;
function escapeStringRegExp(str) {
return str.replace(escapeStringRegExp.matchOperatorsRe, '\\$&');
}
console.log(new RegExp(escapeStringRegExp('example.com')));
// => /example\.com/
Using tagged template literals with flags support:
function str2reg(flags = 'u') {
return (...args) => new RegExp(escapeStringRegExp(evalTemplate(...args))
, flags)
}
function evalTemplate(strings, ...values) {
let i = 0
return strings.reduce((str, string) => `${str}${string}${
i < values.length ? values[i++] : ''}`, '')
}
console.log(str2reg()`example.com`)
// => /example\.com/u
Use the JavaScript RegExp object constructor.
var re = new RegExp("\\w+");
re.test("hello");
You can pass flags as a second string argument to the constructor. See the documentation for details.
In my case the user input somethimes was sorrounded by delimiters and sometimes not. therefore I added another case..
var regParts = inputstring.match(/^\/(.*?)\/([gim]*)$/);
if (regParts) {
// the parsed pattern had delimiters and modifiers. handle them.
var regexp = new RegExp(regParts[1], regParts[2]);
} else {
// we got pattern string without delimiters
var regexp = new RegExp(inputstring);
}
Try using the following function:
const stringToRegex = str => {
// Main regex
const main = str.match(/\/(.+)\/.*/)[1]
// Regex options
const options = str.match(/\/.+\/(.*)/)[1]
// Compiled regex
return new RegExp(main, options)
}
You can use it like so:
"abc".match(stringToRegex("/a/g"))
//=> ["a"]
Here is my one liner function that handles custom delimiters and invalid flags
// One liner
var stringToRegex = (s, m) => (m = s.match(/^([\/~#;%#'])(.*?)\1([gimsuy]*)$/)) ? new RegExp(m[2], m[3].split('').filter((i, p, s) => s.indexOf(i) === p).join('')) : new RegExp(s);
// Readable version
function stringToRegex(str) {
const match = str.match(/^([\/~#;%#'])(.*?)\1([gimsuy]*)$/);
return match ?
new RegExp(
match[2],
match[3]
// Filter redundant flags, to avoid exceptions
.split('')
.filter((char, pos, flagArr) => flagArr.indexOf(char) === pos)
.join('')
)
: new RegExp(str);
}
console.log(stringToRegex('/(foo)?\/bar/i'));
console.log(stringToRegex('#(foo)?\/bar##gi')); //Custom delimiters
console.log(stringToRegex('#(foo)?\/bar##gig')); //Duplicate flags are filtered out
console.log(stringToRegex('/(foo)?\/bar')); // Treated as string
console.log(stringToRegex('gig')); // Treated as string
I suggest you also add separate checkboxes or a textfield for the special flags. That way it is clear that the user does not need to add any //'s. In the case of a replace, provide two textfields. This will make your life a lot easier.
Why? Because otherwise some users will add //'s while other will not. And some will make a syntax error. Then, after you stripped the //'s, you may end up with a syntactically valid regex that is nothing like what the user intended, leading to strange behaviour (from the user's perspective).
This will work also when the string is invalid or does not contain flags etc:
function regExpFromString(q) {
let flags = q.replace(/.*\/([gimuy]*)$/, '$1');
if (flags === q) flags = '';
let pattern = (flags ? q.replace(new RegExp('^/(.*?)/' + flags + '$'), '$1') : q);
try { return new RegExp(pattern, flags); } catch (e) { return null; }
}
console.log(regExpFromString('\\bword\\b'));
console.log(regExpFromString('\/\\bword\\b\/gi'));
Thanks to earlier answers, this blocks serves well as a general purpose solution for applying a configurable string into a RegEx .. for filtering text:
var permittedChars = '^a-z0-9 _,.?!#+<>';
permittedChars = '[' + permittedChars + ']';
var flags = 'gi';
var strFilterRegEx = new RegExp(permittedChars, flags);
log.debug ('strFilterRegEx: ' + strFilterRegEx);
strVal = strVal.replace(strFilterRegEx, '');
// this replaces hard code solt:
// strVal = strVal.replace(/[^a-z0-9 _,.?!#+]/ig, '');
You can ask for flags using checkboxes then do something like this:
var userInput = formInput;
var flags = '';
if(formGlobalCheckboxChecked) flags += 'g';
if(formCaseICheckboxChecked) flags += 'i';
var reg = new RegExp(userInput, flags);
Safer, but not safe. (A version of Function that didn't have access to any other context would be good.)
const regexp = Function('return ' + string)()
I found #Richie Bendall solution very clean. I added few small modifications because it falls appart and throws error (maybe that's what you want) when passing non regex strings.
const stringToRegex = (str) => {
const re = /\/(.+)\/([gim]?)/
const match = str.match(re);
if (match) {
return new RegExp(match[1], match[2])
}
}
Using [gim]? in the pattern will ignore any match[2] value if it's invalid. You can omit the [gim]? pattern if you want an error to be thrown if the regex options is invalid.
I use eval to solve this problem.
For example:
function regex_exec() {
// Important! Like #Samuel Faure mentioned, Eval on user input is a crazy security risk, so before use this method, please take care of the security risk.
var regex = $("#regex").val();
// eval()
var patt = eval(userInput);
$("#result").val(patt.exec($("#textContent").val()));
}

how to check if a string matches a pattern with asterisk

I have a list of files:
var files = [
"user_parse_info",
"user_parse_age",
"site_render_info",
"site_parse_name"
]
Now I have string a pattern:
var pattern = "site_*_name";
This should match only the last file.
How can I check this? Maybe an RegExp?
Yes, regular expression would be a better choice for this.
var _ = require('nimble');
var files = [
"user_parse_info",
"user_parse_age",
"site_render_info",
"site_parse_name"
];
var re = /^site_.*_name$/;
var result = _.filter(files, function (val) {
return re.test(val);
});
Using a regular expression means that you would need to escape some characters like . and $, otherwise they can give false positives or keep the pattern from matching anything.
You can just split the string on the asterisk and check the parts against the string:
var parts = pattern.split('*');
if (
str.length >= parts[0].length + parts[1].length &&
str.substr(0, parts[0].length) == parts[0] &&
str.substr(str.length - parts[1].length) == parts[1])
{
// matches
}
Demo: http://jsfiddle.net/Guffa/u8XEE/

JavaScript return only regex from string

This might be pretty simple, but I am having a hard time with it. Consider the following code:
var string = 'testingT#$^%#$ESTING__--232'
string = string.match(/^\w*$/)
if (string != null)
{
string = string.join('')
string = string.toUpperCase()
}
$('#my-input').val(string)
What I want to do, is to strip all characters that aren't alphanumeric or underscore from string, and then transform that string to uppercase.
So far I did that, it works perfectly if I don't add any special characters, but when I add - or ^ to it, for example, it deletes everything from #my-input
You can do this in one step:
string = string.replace(/[^\w]/g, '').toUpperCase();
console.log(string); //=> "TESTINGTESTING__232"
var string = string.replace(/[^a-zA-Z_0-9]/g,'').toUpperCase()
Also, do you need unicode? My regex will only match a-z, and not åÉø for example.
You need use 'global' flag in regex and remove match restriction.
var str = 'testingT#$^%#$ESTING__--232';
str = str.match(/\w+/g);
if (str !== null)
{
str = str.join('');
str = str.toUpperCase();
}
$('#my-input').val(str);

Categories

Resources