javascript match returning word boundry - javascript

RegEx is not my strong suit. I hope one of you can help me.
I am trying to use javacript.match() to search for all hashed(# at the beginning) words.
and I get a returned white space.
string = "#foo #bar"
result = string.match(/(^|\W)(#[a-z\d][\w-]*)/ig);
console.log(result)
Results in:
["#foo", " #bar"]
Notice the whitespace in #bar.
What should I change to my RegEx to exclude the boundary in the return to .match?
Thanks for the help!!

You don't need (^|\W) before as # is considered non word character anyway. You are getting space because \W will match space also.
This regex would work better:
var re = /(?:^|\s)(#[a-z\d][\w-]*)/g,
matches = [],
input = "#foo #bar abc#baz";
while (match = re.exec(input)) matches.push(match[1].trim());
console.log(matches);
//=> ["#foo", "#bar"]
EDIT: To avoid loops:
var m = [];
var str = "#foo #bar abc#baz";
str.replace(/(^|\s)(#[a-z\d][\w-]*)/g, function($1) { m.push($1.trim()); return $1; } );
console.log(m);
//=> ["#foo", "#bar"]

You need to use this syntax to extract capture groups:
var str = '#foo #bar';
var myRegexp = new RegExp('(?:^|\\W)(#[^\\W_][\\w-]*)', 'g');
var matchResult = myRegexp.exec(str);
var result = Array();
while (matchResult != null) {
result.push(matchResult[1]);
matchResult = myRegexp.exec(str);
}
console.log(result);
If you don't want to loop through the match results, you can use this trick:
var str = '#foo #bar';
var result = Array();
str.replace(/(?:^|\W)(#[^\W_][\w-]*)/g, function (m, g1) { result.push(g1); } );
console.log(result);

Related

How to match overlapping keywords with regex

This example finds only sam. How to make it find both sam and samwise?
var regex = /sam|samwise|merry|pippin/g;
var string = 'samwise gamgee';
var match = string.match(regex);
console.log(match);
Note: this is simple example, but my real regexes are created by joining 500 keywords at time, so it's too cumbersome to search all overlapping and make a special case for them with something like /sam(wise)/. The other obvious solution I can think of, is to just iterate though all keywords individually, but I think it must be a fast and elegant, single-regex solution.
You can use lookahead regex with capturing group for this overlapping match:
var regex = /(?=(sam))(?=(samwise))/;
var string = 'samwise';
var match = string.match( regex ).filter(Boolean);
//=> ["sam", "samwise"]
It is important to not to use g (global) flag in the regex.
filter(Boolean) is used to remove first empty result from matched array.
Why not just map indexOf() on array substr:
var string = 'samwise gamgee';
var substr = ['sam', 'samwise', 'merry', 'pippin'];
var matches = substr.map(function(m) {
return (string.indexOf(m) < 0 ? false : m);
}).filter(Boolean);
See fiddle console.log(matches);
Array [ "sam", "samwise" ]
Probably of better performance than using regex. But if you need the regex functionality e.g. for caseless matching, word boundaries, returned matches... use with exec method:
var matches = substr.map(function(v) {
var re = new RegExp("\\b" + v, "i"); var m = re.exec(string);
return (m !== null ? m[0] : false);
}).filter(Boolean);
This one with i-flag (ignore case) returns each first match with initial \b word boundary.
I can't think of a simple and elegant solution, but I've got something that uses a single regex:
function quotemeta(s) {
return s.replace(/\W/g, '\\$&');
}
let keywords = ['samwise', 'sam'];
let subsumed_by = {};
keywords.sort();
for (let i = keywords.length; i--; ) {
let k = keywords[i];
for (let j = i - 1; j >= 0 && k.startsWith(keywords[j]); j--) {
(subsumed_by[k] = subsumed_by[k] || []).push(keywords[j]);
}
}
keywords.sort(function (a, b) b.length - a.length);
let re = new RegExp('(?=(' + keywords.map(quotemeta).join('|') + '))[\\s\\S]', 'g');
let string = 'samwise samgee';
let result = [];
let m;
while (m = re.exec(string)) {
result.push(m[1]);
result.push.apply(result, subsumed_by[m[1]] || []);
}
console.log(result);
How about:
var re = /((sam)(?:wise)?)/;
var m = 'samwise'.match(re); // gives ["samwise", "samwise", "sam"]
var m = 'sam'.match(re); // gives ["sam", "sam", "sam"]
You can use Unique values in an array to remove dupplicates.
If you don't want to create special cases, and if order doesn't matter, why not first match only full names with:
\b(sam|samwise|merry|pippin)\b
and then, filter if some of these doesn't contain shorter one? for example with:
(sam|samwise|merry|pippin)(?=\w+\b)
It is not one elegant regex, but I suppose it is simpler than iterating through all matches.

Replace capture group content in JavaScript

In my JavaScript code I have a regular expression with capture groups (that is configured by library user) and a source string which matches this regular expression. The regular expression matches whole string (i.e. it has ^ and $ characters at its start and end).
A silly example:
var regex = /^([a-zA-Z]{2})-([0-9]{3})_.*$/;
var sourceStr = "ab-123_foo";
I want to reassemble the source string, replacing values in the capture groups and leaving the rest of the string intact. Note that, while this example has most of the "rest of the string" at its end, it actually may be anywhere else.
For example:
var replacements = [ "ZX", "321" ];
var expectedString = "ZX-321_foo";
Is there a way to do this in JavaScript?
NB: The regular expression is configured by the library user via the legacy API. I can not ask user to provide a second regular expression to solve this problem.
Without changing the regex the best I can think of is a callback that replaces the matches
sourceStr = sourceStr.replace(regex, function(match, $1, $2, offset, str) {
return str.replace($1, replacements[0]).replace($2, replacements[1]);
});
That's not a very good solution, as it would fail on something like
var sourceStr = "ab_ab-123_foo";
as it would replace the first ab instead of the matched one etc. but works for the given example and any string that doesn't repeat the matched characters
var regex = /^([a-zA-Z]{2})-([0-9]{3})_.*$/;
var sourceStr = "ab-123_foo";
var replacements = [ "ZX", "321" ];
sourceStr = sourceStr.replace(regex, function(match, $1, $2, offset, str) {
return str.replace($1, replacements[0]).replace($2, replacements[1]);
});
document.body.innerHTML = sourceStr;
I think this is close. It satisfies the two test cases but I'm unsure about leading and trailing groupings.
function replacer (regex, sourceStr, replacements) {
// Make a new regex that adds groups to ungrouped items.
var groupAll = "";
var lastIndex = 0;
var src = regex.source;
var reGroup=/\(.*?\)/g;
var match;
while(match = reGroup.exec(src)){
groupAll += "(" + src.substring(lastIndex, match.index) + ")";
groupAll += match[0];
lastIndex = match.index + match[0].length;
}
var reGroupAll = new RegExp(groupAll);
// Replace the original groupings with the replacements
// and append what was previously ungrouped.
var rep = sourceStr.replace(reGroupAll, function(){
// (match, $1, $2, ..., index, source)
var len = arguments.length - 2;
var ret = "";
for (var i = 1,j=0; i < len; i+=2,j++) {
ret += arguments[i];
ret += replacements[j];
}
return ret;
});
return rep;
}
var regex = /^([a-zA-Z]{2})-([0-9]{3})_.*$/;
var sourceStr = "ab-123_foo";
var replacements = [ "ZX", "321" ];
var expectedString = "ZX-321_foo";
var replaced = replacer(regex, sourceStr, replacements);
console.log(replaced);
console.log(replaced === expectedString);
regex = /^.*_([a-zA-Z]{2})-([0-9]{3})$/;
sourceStr = "ab_ab-123";
expectedString = "ab_ZX-321";
var replaced = replacer(regex, sourceStr, replacements);
console.log(replaced);
console.log(replaced === expectedString);
Output:
ZX-321_foo
true
ab_ZX-321
true

Javascript regex to extract variables

I have a string in Javascript that contains variables with the format ${var-name}. For example:
'This is a string with ${var1} and ${var2}'
I need to get these variables in an array: ['var1','var2'].
Is this possible with regex?
Have a try with:
/\$\{(\w+?)\}/
Running example, many thanks to #RGraham :
var regex = new RegExp(/\$\{(\w+?)\}/g),
text = "This is a string with ${var1} and ${var2} and {var3}",
result,
out = [];
while(result = regex.exec(text)) {
out.push(result[1]);
}
console.log(out);
This regex - \${([^\s}]+)(?=}) - should work.
Explanation:
\${ - Match literal ${
([^\s}]+) - A capture group that will match 1 or more character that are not whitespace or literal }.
(?=}) - A positive look-ahead that will check if we finally matched a literal }.
Here is sample code:
var re = /\${([^\s}]+)(?=})/g;
var str = 'This is a string with ${var1} and ${var2} and {var3}';
var arr = [];
while ((m = re.exec(str)) !== null) {
arr.push(m[1]);
}
alert(arr);
var str = 'This is a string with ${var1} and ${var2}';
var re = /\$\{(\w+?)\}/g;
var arr = [];
var match;
while (match = re.exec(str)) {
arr.push(match[1]);
}
console.log(arr);

Remove all occurrences except last?

I want to remove all occurrences of substring = . in a string except the last one.
E.G:
1.2.3.4
should become:
123.4
You can use regex with positive look ahead,
"1.2.3.4".replace(/[.](?=.*[.])/g, "");
2-liner:
function removeAllButLast(string, token) {
/* Requires STRING not contain TOKEN */
var parts = string.split(token);
return parts.slice(0,-1).join('') + token + parts.slice(-1)
}
Alternative version without the requirement on the string argument:
function removeAllButLast(string, token) {
var parts = string.split(token);
if (parts[1]===undefined)
return string;
else
return parts.slice(0,-1).join('') + token + parts.slice(-1)
}
Demo:
> removeAllButLast('a.b.c.d', '.')
"abc.d"
The following one-liner is a regular expression that takes advantage of the fact that the * character is greedy, and that replace will leave the string alone if no match is found. It works by matching [longest string including dots][dot] and leaving [rest of string], and if a match is found it strips all '.'s from it:
'a.b.c.d'.replace(/(.*)\./, x => x.replace(/\./g,'')+'.')
(If your string contains newlines, you will have to use [.\n] rather than naked .s)
You can do something like this:
var str = '1.2.3.4';
var last = str.lastIndexOf('.');
var butLast = str.substring(0, last).replace(/\./g, '');
var res = butLast + str.substring(last);
Live example:
http://jsfiddle.net/qwjaW/
You could take a positive lookahead (for keeping the last dot, if any) and replace the first coming dots.
var string = '1.2.3.4';
console.log(string.replace(/\.(?=.*\.)/g, ''));
A replaceAllButLast function is more useful than a removeAllButLast function. When you want to remove just replace with an empty string:
function replaceAllButLast(str, pOld, pNew) {
var parts = str.split(pOld)
if (parts.length === 1) return str
return parts.slice(0, -1).join(pNew) + pOld + parts.slice(-1)
}
var test = 'hello there hello there hello there'
test = replaceAllButLast(test, ' there', '')
console.log(test) // hello hello hello there
Found a much better way of doing this. Here is replaceAllButLast and appendAllButLast as they should be done. The latter does a replace whilst preserving the original match. To remove, just replace with an empty string.
var str = "hello there hello there hello there"
function replaceAllButLast(str, regex, replace) {
var reg = new RegExp(regex, 'g')
return str.replace(reg, function(match, offset, str) {
var follow = str.slice(offset);
var isLast = follow.match(reg).length == 1;
return (isLast) ? match : replace
})
}
function appendAllButLast(str, regex, append) {
var reg = new RegExp(regex, 'g')
return str.replace(reg, function(match, offset, str) {
var follow = str.slice(offset);
var isLast = follow.match(reg).length == 1;
return (isLast) ? match : match + append
})
}
var replaced = replaceAllButLast(str, / there/, ' world')
console.log(replaced)
var appended = appendAllButLast(str, / there/, ' fred')
console.log(appended)
Thanks to #leaf for these masterpieces which he gave here.
You could reverse the string, remove all occurrences of substring except the first, and reverse it again to get what you want.
function formatString() {
var arr = ('1.2.3.4').split('.');
var arrLen = arr.length-1;
var outputString = '.' + arr[arrLen];
for (var i=arr.length-2; i >= 0; i--) {
outputString = arr[i]+outputString;
}
alert(outputString);
}
See it in action here: http://jsbin.com/izebay
var s='1.2.3.4';
s=s.split('.');
s.splice(s.length-1,0,'.');
s.join('');
123.4

Replace last occurrence of character in string

Is there an easy way in javascript to replace the last occurrence of an '_' (underscore) in a given string?
You don't need jQuery, just a regular expression.
This will remove the last underscore:
var str = 'a_b_c';
console.log( str.replace(/_([^_]*)$/, '$1') ) //a_bc
This will replace it with the contents of the variable replacement:
var str = 'a_b_c',
replacement = '!';
console.log( str.replace(/_([^_]*)$/, replacement + '$1') ) //a_b!c
No need for jQuery nor regex assuming the character you want to replace exists in the string
Replace last char in a string
str = str.substring(0,str.length-2)+otherchar
Replace last underscore in a string
var pos = str.lastIndexOf('_');
str = str.substring(0,pos) + otherchar + str.substring(pos+1)
or use one of the regular expressions from the other answers
var str1 = "Replace the full stop with a questionmark."
var str2 = "Replace last _ with another char other than the underscore _ near the end"
// Replace last char in a string
console.log(
str1.substring(0,str1.length-2)+"?"
)
// alternative syntax
console.log(
str1.slice(0,-1)+"?"
)
// Replace last underscore in a string
var pos = str2.lastIndexOf('_'), otherchar = "|";
console.log(
str2.substring(0,pos) + otherchar + str2.substring(pos+1)
)
// alternative syntax
console.log(
str2.slice(0,pos) + otherchar + str2.slice(pos+1)
)
What about this?
function replaceLast(x, y, z){
var a = x.split("");
a[x.lastIndexOf(y)] = z;
return a.join("");
}
replaceLast("Hello world!", "l", "x"); // Hello worxd!
Another super clear way of doing this could be as follows:
let modifiedString = originalString
.split('').reverse().join('')
.replace('_', '')
.split('').reverse().join('')
Keep it simple
var someString = "a_b_c";
var newCharacter = "+";
var newString = someString.substring(0, someString.lastIndexOf('_')) + newCharacter + someString.substring(someString.lastIndexOf('_')+1);
var someString = "(/n{})+++(/n{})---(/n{})$$$";
var toRemove = "(/n{})"; // should find & remove last occurrence
function removeLast(s, r){
s = s.split(r)
return s.slice(0,-1).join(r) + s.pop()
}
console.log(
removeLast(someString, toRemove)
)
Breakdown:
s = s.split(toRemove) // ["", "+++", "---", "$$$"]
s.slice(0,-1) // ["", "+++", "---"]
s.slice(0,-1).join(toRemove) // "})()+++})()---"
s.pop() // "$$$"
Reverse the string, replace the char, reverse the string.
Here is a post for reversing a string in javascript: How do you reverse a string in place in JavaScript?
// Define variables
let haystack = 'I do not want to replace this, but this'
let needle = 'this'
let replacement = 'hey it works :)'
// Reverse it
haystack = Array.from(haystack).reverse().join('')
needle = Array.from(needle).reverse().join('')
replacement = Array.from(replacement).reverse().join('')
// Make the replacement
haystack = haystack.replace(needle, replacement)
// Reverse it back
let results = Array.from(haystack).reverse().join('')
console.log(results)
// 'I do not want to replace this, but hey it works :)'
This is very similar to mplungjan's answer, but can be a bit easier (especially if you need to do other string manipulation right after and want to keep it as an array)
Anyway, I just thought I'd put it out there in case someone prefers it.
var str = 'a_b_c';
str = str.split(''); //['a','_','b','_','c']
str.splice(str.lastIndexOf('_'),1,'-'); //['a','_','b','-','c']
str = str.join(''); //'a_b-c'
The '_' can be swapped out with the char you want to replace
And the '-' can be replaced with the char or string you want to replace it with
You can use this code
var str="test_String_ABC";
var strReplacedWith=" and ";
var currentIndex = str.lastIndexOf("_");
str = str.substring(0, currentIndex) + strReplacedWith + str.substring(currentIndex + 1, str.length);
alert(str);
This is a recursive way that removes multiple occurrences of "endchar":
function TrimEnd(str, endchar) {
while (str.endsWith(endchar) && str !== "" && endchar !== "") {
str = str.slice(0, -1);
}
return str;
}
var res = TrimEnd("Look at me. I'm a string without dots at the end...", ".");
console.log(res)

Categories

Resources