Replace string with a function of replaced string - javascript

I would like to find a performant and elegant solution to this problem :
I need to replace a substring of a string, case insensitively, by a function of what have been replaced.
Example :
var highlight = function(inputString, filterString) {
var regex = new RegExp(filterString, 'gi');
return inputString.replace(regex, '<b>' + filterString + '</b>');
};
highlight('The input', 't');
If your run this in your browser console, you'll get :
"<b>t</b>he inpu<b>t</b>"
My problem is that I'd like to keep the original case of the replaced string. Therefore, the expected result would be :
"<b>T</b>he inpu<b>t</b>"
Any idea ?
Edit: Answer by #georg:
return inputString.replace(regex, '<b>$&</b>');

Just use $& in the replacement, which means "what has been found":
var highlight = function(inputString, filterString) {
var regex = new RegExp(filterString, 'gi');
return inputString.replace(regex, '<b>$&</b>');
};
x = highlight('The input', 't');
document.write("<pre>" + JSON.stringify(x,0,3));
Other $... magic constructs, for the reference:
MDN

$& contains the string matched by the last pattern match.
Please refer
https://msdn.microsoft.com/en-us/library/ie/3k9c4a32%28v=vs.94%29.aspx
for more details.

Related

How to put a variable in my JS regular expression? [duplicate]

I want to add a (variable) tag to values with regex, the pattern works fine with PHP but I have troubles implementing it into JavaScript.
The pattern is (value is the variable):
/(?!(?:[^<]+>|[^>]+<\/a>))\b(value)\b/is
I escaped the backslashes:
var str = $("#div").html();
var regex = "/(?!(?:[^<]+>|[^>]+<\\/a>))\\b(" + value + ")\\b/is";
$("#div").html(str.replace(regex, "" + value + ""));
But this seem not to be right, I logged the pattern and its exactly what it should be.
Any ideas?
To create the regex from a string, you have to use JavaScript's RegExp object.
If you also want to match/replace more than one time, then you must add the g (global match) flag. Here's an example:
var stringToGoIntoTheRegex = "abc";
var regex = new RegExp("#" + stringToGoIntoTheRegex + "#", "g");
// at this point, the line above is the same as: var regex = /#abc#/g;
var input = "Hello this is #abc# some #abc# stuff.";
var output = input.replace(regex, "!!");
alert(output); // Hello this is !! some !! stuff.
JSFiddle demo here.
In the general case, escape the string before using as regex:
Not every string is a valid regex, though: there are some speciall characters, like ( or [. To work around this issue, simply escape the string before turning it into a regex. A utility function for that goes in the sample below:
function escapeRegExp(stringToGoIntoTheRegex) {
return stringToGoIntoTheRegex.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&');
}
var stringToGoIntoTheRegex = escapeRegExp("abc"); // this is the only change from above
var regex = new RegExp("#" + stringToGoIntoTheRegex + "#", "g");
// at this point, the line above is the same as: var regex = /#abc#/g;
var input = "Hello this is #abc# some #abc# stuff.";
var output = input.replace(regex, "!!");
alert(output); // Hello this is !! some !! stuff.
JSFiddle demo here.
Note: the regex in the question uses the s modifier, which didn't exist at the time of the question, but does exist -- a s (dotall) flag/modifier in JavaScript -- today.
If you are trying to use a variable value in the expression, you must use the RegExp "constructor".
var regex = "(?!(?:[^<]+>|[^>]+<\/a>))\b(" + value + ")\b";
new RegExp(regex, "is")
I found I had to double slash the \b to get it working. For example to remove "1x" words from a string using a variable, I needed to use:
str = "1x";
var regex = new RegExp("\\b"+str+"\\b","g"); // same as inv.replace(/\b1x\b/g, "")
inv=inv.replace(regex, "");
You don't need the " to define a regular expression so just:
var regex = /(?!(?:[^<]+>|[^>]+<\/a>))\b(value)\b/is; // this is valid syntax
If value is a variable and you want a dynamic regular expression then you can't use this notation; use the alternative notation.
String.replace also accepts strings as input, so you can do "fox".replace("fox", "bear");
Alternative:
var regex = new RegExp("/(?!(?:[^<]+>|[^>]+<\/a>))\b(value)\b/", "is");
var regex = new RegExp("/(?!(?:[^<]+>|[^>]+<\/a>))\b(" + value + ")\b/", "is");
var regex = new RegExp("/(?!(?:[^<]+>|[^>]+<\/a>))\b(.*?)\b/", "is");
Keep in mind that if value contains regular expressions characters like (, [ and ? you will need to escape them.
I found this thread useful - so I thought I would add the answer to my own problem.
I wanted to edit a database configuration file (datastax cassandra) from a node application in javascript and for one of the settings in the file I needed to match on a string and then replace the line following it.
This was my solution.
dse_cassandra_yaml='/etc/dse/cassandra/cassandra.yaml'
// a) find the searchString and grab all text on the following line to it
// b) replace all next line text with a newString supplied to function
// note - leaves searchString text untouched
function replaceStringNextLine(file, searchString, newString) {
fs.readFile(file, 'utf-8', function(err, data){
if (err) throw err;
// need to use double escape '\\' when putting regex in strings !
var re = "\\s+(\\-\\s(.*)?)(?:\\s|$)";
var myRegExp = new RegExp(searchString + re, "g");
var match = myRegExp.exec(data);
var replaceThis = match[1];
var writeString = data.replace(replaceThis, newString);
fs.writeFile(file, writeString, 'utf-8', function (err) {
if (err) throw err;
console.log(file + ' updated');
});
});
}
searchString = "data_file_directories:"
newString = "- /mnt/cassandra/data"
replaceStringNextLine(dse_cassandra_yaml, searchString, newString );
After running, it will change the existing data directory setting to the new one:
config file before:
data_file_directories:
- /var/lib/cassandra/data
config file after:
data_file_directories:
- /mnt/cassandra/data
Much easier way: use template literals.
var variable = 'foo'
var expression = `.*${variable}.*`
var re = new RegExp(expression, 'g')
re.test('fdjklsffoodjkslfd') // true
re.test('fdjklsfdjkslfd') // false
Using string variable(s) content as part of a more complex composed regex expression (es6|ts)
This example will replace all urls using my-domain.com to my-other-domain (both are variables).
You can do dynamic regexs by combining string values and other regex expressions within a raw string template. Using String.raw will prevent javascript from escaping any character within your string values.
// Strings with some data
const domainStr = 'my-domain.com'
const newDomain = 'my-other-domain.com'
// Make sure your string is regex friendly
// This will replace dots for '\'.
const regexUrl = /\./gm;
const substr = `\\\.`;
const domain = domainStr.replace(regexUrl, substr);
// domain is a regex friendly string: 'my-domain\.com'
console.log('Regex expresion for domain', domain)
// HERE!!! You can 'assemble a complex regex using string pieces.
const re = new RegExp( String.raw `([\'|\"]https:\/\/)(${domain})(\S+[\'|\"])`, 'gm');
// now I'll use the regex expression groups to replace the domain
const domainSubst = `$1${newDomain}$3`;
// const page contains all the html text
const result = page.replace(re, domainSubst);
note: Don't forget to use regex101.com to create, test and export REGEX code.
var string = "Hi welcome to stack overflow"
var toSearch = "stack"
//case insensitive search
var result = string.search(new RegExp(toSearch, "i")) > 0 ? 'Matched' : 'notMatched'
https://jsfiddle.net/9f0mb6Lz/
Hope this helps

How to get value in $1 in regex to a variable for further manipulation [duplicate]

You can backreference like this in JavaScript:
var str = "123 $test 123";
str = str.replace(/(\$)([a-z]+)/gi, "$2");
This would (quite silly) replace "$test" with "test". But imagine I'd like to pass the resulting string of $2 into a function, which returns another value. I tried doing this, but instead of getting the string "test", I get "$2". Is there a way to achieve this?
// Instead of getting "$2" passed into somefunc, I want "test"
// (i.e. the result of the regex)
str = str.replace(/(\$)([a-z]+)/gi, somefunc("$2"));
Like this:
str.replace(regex, function(match, $1, $2, offset, original) { return someFunc($2); })
Pass a function as the second argument to replace:
str = str.replace(/(\$)([a-z]+)/gi, myReplace);
function myReplace(str, group1, group2) {
return "+" + group2 + "+";
}
This capability has been around since Javascript 1.3, according to mozilla.org.
Using ESNext, quite a dummy links replacer but just to show-case how it works :
let text = 'Visit http://lovecats.com/new-posts/ and https://lovedogs.com/best-dogs NOW !';
text = text.replace(/(https?:\/\/[^ ]+)/g, (match, link) => {
// remove ending slash if there is one
link = link.replace(/\/?$/, '');
return `${link.substr(link.lastIndexOf('/') +1)}`;
});
document.body.innerHTML = text;
Note: Previous answer was missing some code. It's now fixed + example.
I needed something a bit more flexible for a regex replace to decode the unicode in my incoming JSON data:
var text = "some string with an encoded 's' in it";
text.replace(/&#(\d+);/g, function() {
return String.fromCharCode(arguments[1]);
});
// "some string with an encoded 's' in it"
If you would have a variable amount of backreferences then the argument count (and places) are also variable. The MDN Web Docs describe the follwing syntax for sepcifing a function as replacement argument:
function replacer(match[, p1[, p2[, p...]]], offset, string)
For instance, take these regular expressions:
var searches = [
'test([1-3]){1,3}', // 1 backreference
'([Ss]ome) ([A-z]+) chars', // 2 backreferences
'([Mm][a#]ny) ([Mm][0o]r[3e]) ([Ww][0o]rd[5s])' // 3 backreferences
];
for (var i in searches) {
"Some string chars and many m0re w0rds in this test123".replace(
new RegExp(
searches[i]
function(...args) {
var match = args[0];
var backrefs = args.slice(1, args.length - 2);
// will be: ['Some', 'string'], ['many', 'm0re', 'w0rds'], ['123']
var offset = args[args.length - 2];
var string = args[args.length - 1];
}
)
);
}
You can't use 'arguments' variable here because it's of type Arguments and no of type Array so it doesn't have a slice() method.

get particular strings from a text that separated by underscore

I am trying to get the particular strings from the text below :
var str = "001AN_LAST_BRANCH_HYB_1hhhhh5_PBTsd_JENKIN.bin";
From this i have to get the following strings: "LAST", "BRANCH" and "JENKIN".
I used the code below to get "JENKIN";
var result = str.substr(str.lastIndexOf("_") +1);
It will get the result "JENKIN.bin". I need only "JENKIN".
Also the input string str sometimes contains this ".bin" string.
with substring() function you can extract text you need with defining start and end position. You have already found the start position with str.lastIndexOf("_") +1 and adding end position with str.indexOf(".") to substring() function will give you the result you need.
var result = str.substring(str.lastIndexOf("_") +1,str.indexOf("."));
It depends on how predictable the pattern is. How about:
var parts = str.replace(/\..+/, '').split('_');
And then parts[0] is 001AN, parts[1] is LAST, etc
You can use String.prototype.split to split a string into an array by a given separator:
var str = '001AN_LAST_BRANCH_HYB_1hhhhh5_PBTsd_JENKIN.bin';
var parts = str.split('_');
// parts is ['001AN', 'LAST', 'BRANCH', 'HYB', '1hhhhh5', 'PBTsd', 'JENKIN.bin'];
document.body.innerText = parts[1] + ", " + parts[2] + " and " + parts[6].split('.')[0];
You could do that way:
var re = /^[^_]*_([^_]*)_([^_]*)_.*_([^.]*)\..*$/;
var str = "001AN_LAST_BRANCH_HYB_1hhhhh5_PBTsd_JENKIN.bin";
var matches = re.exec(str);
console.log(matches[1]); // LAST
console.log(matches[2]); // BRANCH
console.log(matches[3]); // JENKIN
This way you can reuse your RegExp anytime you want, and it can be used in other languages too.
Try using String.prototype.match() with RegExp /([A-Z])+(?=_B|_H|\.)/g to match any number of uppercase letters followed by "_B" , "_H" or "."
var str = "001AN_LAST_BRANCH_HYB_1hhhhh5_PBTsd_JENKIN.bin";
var res = str.match(/([A-Z])+(?=_B|_H|\.)/g);
console.log(res)
I don't know why you want to that, but this example would be helpful.
It will be better write what exactly you want.
str = '001AN_LAST_BRANCH_HYB_1hhhhh5_PBTsd_JENKIN.bin'
find = ['LAST', 'BRANCH', 'JENKINS']
found = []
for item in find:
if item in str:
found.append(item)
print found # ['LAST', 'BRANCH']

Javascript: highlight substring keeping original case but searching in case insensitive mode

I'm trying to write a "suggestion search box" and I cannot find a solution that allows to highlight a substring with javascript keeping the original case.
For example if I search for "ca" I search server side in a case insensitive mode and I have the following results:
Calculator
calendar
ESCAPE
I would like to view the search string in all the previous words, so the result should be:
Calculator
calendar
ESCAPE
I tried with the following code:
var reg = new RegExp(querystr, 'gi');
var final_str = 'foo ' + result.replace(reg, '<b>'+querystr+'</b>');
$('#'+id).html(final_str);
But obviously in this way I loose the original case!
Is there a way to solve this problem?
Use a function for the second argument for .replace() that returns the actual matched string with the concatenated tags.
Try it out: http://jsfiddle.net/4sGLL/
reg = new RegExp(querystr, 'gi');
// The str parameter references the matched string
// --------------------------------------v
final_str = 'foo ' + result.replace(reg, function(str) {return '<b>'+str+'</b>'});
$('#' + id).html(final_str);​
JSFiddle Example with Input: https://jsfiddle.net/pawmbude/
ES6 version
const highlight = (needle, haystack) =>
haystack.replace(
new RegExp(needle, 'gi'),
(str) => `<strong>${str}</strong>`
);
nice results with
function str_highlight_text(string, str_to_highlight){
var reg = new RegExp(str_to_highlight, 'gi');
return string.replace(reg, function(str) {return '<span style="background-color:#ffbf00;color:#fff;"><b>'+str+'</b></span>'});
}
and easier to remember...
thx to user113716: https://stackoverflow.com/a/3294644/2065594
While the other answers so far seem simple, they can't be really used in many real world cases as they don't handle proper text HTML escaping and RegExp escaping. If you want to highlight every possible snippet, while escaping the text properly, a function like that would return all elements you should add to your suggestions box:
function highlightLabel(label, term) {
if (!term) return [ document.createTextNode(label) ]
const regex = new RegExp(term.replace(/[\\^$*+?.()|[\]{}]/g, '\\$&'), 'gi')
const result = []
let left, match, right = label
while (match = right.match(regex)) {
const m = match[0], hl = document.createElement('b'), i = match.index
hl.innerText = m
left = right.slice(0, i)
right = right.slice(i + m.length)
result.push(document.createTextNode(left), hl)
if (!right.length) return result
}
result.push(document.createTextNode(right))
return result
}
string.replace fails in the general case. If you use .innerHTML, replace can replace matches in tags (like a tags). If you use .innerText or .textContent, it will remove any tags there were previously in the html. More than that, in both cases it damages your html if you want to remove the highlighting.
The true answer is mark.js (https://markjs.io/). I just found this - it is what I have been searching for for such a long time. It does just what you want it to.
I do the exact same thing.
You need to make a copy.
I store in the db a copy of the real string, in all lower case.
Then I search using a lower case version of the query string or do a case insensitive regexp.
Then use the resulting found start index in the main string, plus the length of the query string, to highlight the query string within the result.
You can not use the query string in the result since its case is not determinate. You need to highlight a portion of the original string.
.match() performs case insensitive matching and returns an array of the matches with case intact.
var matches = str.match(queryString),
startHere = 0,
nextMatch,
resultStr ='',
qLength = queryString.length;
for (var match in matches) {
nextMatch = str.substr(startHere).indexOf(match);
resultStr = resultStr + str.substr(startHere, nextMatch) + '<b>' + match + '</b>';
startHere = nextMatch + qLength;
}
I have found a easiest way to achieve it. JavaScript regular expression remembers the string it matched. This feature can be used here.
I have modified the code a bit.
reg = new RegExp("("+querystr.trim()+")", 'gi');
final_str = 'foo ' + result.replace(reg, "<b>&1</b>");
$('#'+id).html(final_str);
Highlight search term and anchoring to first occurence - Start
function highlightSearchText(searchText) {
var innerHTML = document.documentElement.innerHTML;
var replaceString = '<mark>'+searchText+'</mark>';
var newInnerHtml = this.replaceAll(innerHTML, searchText, replaceString);
document.documentElement.innerHTML = newInnerHtml;
var elmnt = document.documentElement.getElementsByTagName('mark')[0]
elmnt.scrollIntoView();
}
function replaceAll(str, querystr, replace) {
var reg = new RegExp(querystr, 'gi');
var final_str = str.replace(reg, function(str) {return '<mark>'+str+'</mark>'});
return final_str
}
Highlight search term and anchoring to first occurence - End

JavaScript - string regex backreferences

You can backreference like this in JavaScript:
var str = "123 $test 123";
str = str.replace(/(\$)([a-z]+)/gi, "$2");
This would (quite silly) replace "$test" with "test". But imagine I'd like to pass the resulting string of $2 into a function, which returns another value. I tried doing this, but instead of getting the string "test", I get "$2". Is there a way to achieve this?
// Instead of getting "$2" passed into somefunc, I want "test"
// (i.e. the result of the regex)
str = str.replace(/(\$)([a-z]+)/gi, somefunc("$2"));
Like this:
str.replace(regex, function(match, $1, $2, offset, original) { return someFunc($2); })
Pass a function as the second argument to replace:
str = str.replace(/(\$)([a-z]+)/gi, myReplace);
function myReplace(str, group1, group2) {
return "+" + group2 + "+";
}
This capability has been around since Javascript 1.3, according to mozilla.org.
Using ESNext, quite a dummy links replacer but just to show-case how it works :
let text = 'Visit http://lovecats.com/new-posts/ and https://lovedogs.com/best-dogs NOW !';
text = text.replace(/(https?:\/\/[^ ]+)/g, (match, link) => {
// remove ending slash if there is one
link = link.replace(/\/?$/, '');
return `${link.substr(link.lastIndexOf('/') +1)}`;
});
document.body.innerHTML = text;
Note: Previous answer was missing some code. It's now fixed + example.
I needed something a bit more flexible for a regex replace to decode the unicode in my incoming JSON data:
var text = "some string with an encoded 's' in it";
text.replace(/&#(\d+);/g, function() {
return String.fromCharCode(arguments[1]);
});
// "some string with an encoded 's' in it"
If you would have a variable amount of backreferences then the argument count (and places) are also variable. The MDN Web Docs describe the follwing syntax for sepcifing a function as replacement argument:
function replacer(match[, p1[, p2[, p...]]], offset, string)
For instance, take these regular expressions:
var searches = [
'test([1-3]){1,3}', // 1 backreference
'([Ss]ome) ([A-z]+) chars', // 2 backreferences
'([Mm][a#]ny) ([Mm][0o]r[3e]) ([Ww][0o]rd[5s])' // 3 backreferences
];
for (var i in searches) {
"Some string chars and many m0re w0rds in this test123".replace(
new RegExp(
searches[i]
function(...args) {
var match = args[0];
var backrefs = args.slice(1, args.length - 2);
// will be: ['Some', 'string'], ['many', 'm0re', 'w0rds'], ['123']
var offset = args[args.length - 2];
var string = args[args.length - 1];
}
)
);
}
You can't use 'arguments' variable here because it's of type Arguments and no of type Array so it doesn't have a slice() method.

Categories

Resources