From 'one behind' to 'one before' (between) - javascript

In the string below, I want to find, for example, everything between '[' and 'A'.
Here 'match1', 'match2', 'match3'. And than replace every match with, for example, 'check'.
var str = "+dkdele*[match1Ayesses ss [match2Aevey[match3A";
var pattern = /(?=\[)(.*?)([]?A)/g; // includes '[' and 'A'
var res = str.replace(pattern, "check"); // could be '[checkA'
console.log(res);
The pattern includes '[' and 'A', what I don't want. How could a pattern look like, which matches between two any desired signs?

You can use this regex:
/\[(\w+)(?=A)/g
and grab captured group #1
RegEx Demo
If there can be more than word characters then use:
/\[([^A]+)(?=A)/g
Code:
var re = /\[([^A]+)(?=A)/g;
var str = '+dkdele*[match1Ayesses ss [match2Aevey[match3A';
var m;
while ((m = re.exec(str)) !== null) {
if (m.index === re.lastIndex)
re.lastIndex++;
console.log(m[1]);
}
Output:
match1
match2
match3
EDIT: Based on edited question and comment below:
For replacement you can use:
var repl = str.replace(/(\[)[^A]+(?=A)/, '$1check');
//=> +dkdele*[checkAyesses ss [checkAevey[checkA
PS: If you want A also to be replaced then use:
var repl = str.replace(/(\[)[^A]+A/, '$1check');
//=> +dkdele*[checkyesses ss [checkevey[check

You can use:
var pattern = /(\[).*?(A)/g;
var replacement = "check";
var res = str.replace( pattern, '$1' + replacement + '$2' );
The regular expression is:
(\[) - match an open square bracket and capture the match in a the first group.
.*? - match the minimum possible of zero-or-more of any characters
(A) - then match an A and capture it in the second group.
The replacement will put in the first and second capture groups instead of $1 and $2 (respectively).
With any start and end matches you can do:
var start = "\\[";
var end = "A";
var pattern = new RegExp( "(" + start + ").*?(" + end + ")", "g" );
var replacement = "check";
var res = str.replace( pattern, '$1' + replacement + '$2' );
If you don't want to include the start and end match characters then don't include the capture groups in the replacement:
var res = str.replace( pattern, replacement );

Related

Trouble with Regular Expression and special charters

I am using a regular Expression for a glosary function on a website, but it can not "handle" special charters as æ, ø and å. The regEx is as follows:
var re = new RegExp("\\b" + pat + "\\b", "g");
How can i modify the RegEx above to handle special charters?
You may use the solution from here:
function getWholeWords(input, word) {
var pL = "a-zA-Z\\xAA\\xB5\\xBA\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u037F\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0620-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0800-\\u0815\\u081A\\u0824\\u0828\\u0840-\\u0858\\u08A0-\\u08B2\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971-\\u0980\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0CF1\\u0CF2\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D\\u0D4E\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC-\\u0EDF\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8C\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10C7\\u10CD\\u10D0-\\u10FA\\u10FC-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u167F\\u1681-\\u169A\\u16A0-\\u16EA\\u16F1-\\u16F8\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u18B0-\\u18F5\\u1900-\\u191E\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19AB\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1A20-\\u1A54\\u1AA7\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1BBA-\\u1BE5\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1CE9-\\u1CEC\\u1CEE-\\u1CF1\\u1CF5\\u1CF6\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u209C\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2183\\u2184\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2CE4\\u2CEB-\\u2CEE\\u2CF2\\u2CF3\\u2D00-\\u2D25\\u2D27\\u2D2D\\u2D30-\\u2D67\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u3006\\u3031-\\u3035\\u303B\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31BA\\u31F0-\\u31FF\\u3400-\\u4DB5\\u4E00-\\u9FCC\\uA000-\\uA48C\\uA4D0-\\uA4FD\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA66E\\uA67F-\\uA69D\\uA6A0-\\uA6E5\\uA717-\\uA71F\\uA722-\\uA788\\uA78B-\\uA78E\\uA790-\\uA7AD\\uA7B0\\uA7B1\\uA7F7-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA8F2-\\uA8F7\\uA8FB\\uA90A-\\uA925\\uA930-\\uA946\\uA960-\\uA97C\\uA984-\\uA9B2\\uA9CF\\uA9E0-\\uA9E4\\uA9E6-\\uA9EF\\uA9FA-\\uA9FE\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAA60-\\uAA76\\uAA7A\\uAA7E-\\uAAAF\\uAAB1\\uAAB5\\uAAB6\\uAAB9-\\uAABD\\uAAC0\\uAAC2\\uAADB-\\uAADD\\uAAE0-\\uAAEA\\uAAF2-\\uAAF4\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB30-\\uAB5A\\uAB5C-\\uAB5F\\uAB64\\uAB65\\uABC0-\\uABE2\\uAC00-\\uD7A3\\uD7B0-\\uD7C6\\uD7CB-\\uD7FB\\uF900-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC";
var rx = RegExp("(?:^|[^_0-9" + pL + "])(" + word + ")(?![_0-9" + pL + "])", "ig"); // Build the regex (might be moved out from the function)
var words = [];
while ((m = rx.exec(input)) !== null) {
words.push(m[1]); // Add an occurrence
}
return words;
}
var word = "æøå";
var input = "æøå, AæøåZ, BæøåY, and æøå!";
document.body.innerHTML = "<pre>" + JSON.stringify(getWholeWords(input, word), 0, 4) + "</pre>";
Or a regex that will look for a word only if it is enclosed with whitespace/start/end of the string:
var re = new RegExp("(?:^|\\s)(" + pat.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&') + ")(?!\\S)", "g");
and grab Group 1 value.

How can I make a regex that tests if a whitespace exists after each word?

After each word, a space is test. If there is a space(just 1), than it is correct.
I only have /^[a-zA-Z]+$/ I am using this on js
This regex match every single word (regex-like word include also underscore '_') in a string followed by 2 or more spaces \w+(?=\s{2,})/g:
A snippet of code in javascript to extract them:
var regex = /\w+(?=\s{2,})/g;
var input = 'Word_1 Wrong_Word test3 wrongWord_2 and so On !';
var m;
while ((m = regex.exec(input)) !== null) {
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The wrong words are in m[0], m[1] and so on...
}
Match 1 word, and repeat 1 space + 1 word.
Regex:
/^\w+(?: \w+)*$/
regex101 Demo
Code:
var re = /^\w+(?: \w+)*$/;
var str = 'word1 word2 word3';
if (re.test(str)) {
document.write('"' + str + '" matches ' + re);
} else {
document.write('No match.');
}

Backward capture group concatenated with forward capture group

I think the title says it all. I'm trying to get groups and concatenate them together.
I have this text:
GPX 10.802.123/3843­ 1 -­ IDENTIFIER 48
And I want this output:
IDENTIFIER 10.802.123/3843-48
So I want to explicitly say, I want to capture one group before this word and after, then concatenate both, only using regex. Is this possible?
I can already extract the 48 like this:
var text = GPX 10.802.123/3843­ 1 -­ IDENTIFIER 48
var reg = new RegExp('IDENTIFIER' + '.*?(\\d\\S*)', 'i');
var match = reg.exec(text);
Output:
48
Can it be done?
I'm offering 200 points.
You must precisely define the groups that you want to extract before and after the word. If you define the group before the word as four or more non-whitespace characters, and the group after the word as one or more non-whitespace characters, you can use the following regular expression.
var re = new RegExp('(\\S{4,})\\s+(?:\\S{1,3}\\s+)*?' + word + '.*?(\\S+)', 'i');
var groups = re.exec(text);
if (groups !== null) {
var result = groups[1] + groups[2];
}
Let me break down the regular expression. Note that we have to escape the backslashes because we're writing a regular expression inside a string.
(\\S{4,}) captures a group of four or more non-whitespace characters
\\s+ matches one or more whitespace characters
(?: indicates the start of a non-capturing group
\\S{1,3} matches one to three non-whitespace characters
\\s+ matches one or more whitespace characters
)*? makes the non-capturing group match zero or more times, as few times as possible
word matches whatever was in the variable word when the regular expression was compiled
.*? matches any character zero or more times, as few times as possible
(\\S+) captures one or more non-whitespace characters
the 'i' flag makes this a case-insensitive regular expression
Observe that our use of the ? modifier allows us to capture the nearest groups before and after the word.
You can match the regular expression globally in the text by adding the g flag. The snippet below demonstrates how to extract all matches.
function forward_and_backward(word, text) {
var re = new RegExp('(\\S{4,})\\s+(?:\\S{1,3}\\s+)*?' + word + '.*?(\\S+)', 'ig');
// Find all matches and make an array of results.
var results = [];
while (true) {
var groups = re.exec(text);
if (groups === null) {
return results;
}
var result = groups[1] + groups[2];
results.push(result);
}
}
var sampleText = " GPX 10.802.123/3843- 1 -- IDENTIFIER 48 A BC 444.2345.1.1/99x 28 - - Identifier 580 X Y Z 9.22.16.1043/73+ 0 *** identifier 6800";
results = forward_and_backward('IDENTIFIER', sampleText);
for (var i = 0; i < results.length; ++i) {
document.write('result ' + i + ': "' + results[i] + '"<br><br>');
}
body {
font-family: monospace;
}
You can do:
var text = 'GPX 10.802.123/3843­ 1 -­ IDENTIFIER 48';
var match = /GPX\s+(.+?) \d .*?(IDENTIFIER).*?(\d\S*)/i.exec(text);
var output = match[2] + ' ' + match[1] + '-' + match[3];
//=> "IDENTIFIER 10.802.123/3843­-48"
This would be possible through replace function.
var s = 'GPX 10.802.123/3843­ 1 -­ IDENTIFIER 48'
s.replace(/.*?(\S+)\s+\d+\s*-\s*(IDENTIFIER)\s*(\d+).*/, "$2 $1-$3")
^\s*\S+\s*\b(\d+(?:[./]\d+)+)\b.*?-.*?\b(\S+)\b\s*(\d+)\s*$
You can try this.Replace by $2 $1-$3.See demo.
https://regex101.com/r/sS2dM8/38
var re = /^\s*\S+\s*\b(\d+(?:[.\/]\d+)+)\b.*?-.*?\b(\S+)\b\s*(\d+)\s*$/gm;
var str = 'GPX 10.802.123/3843­ 1 -­ IDENTIFIER 48';
var subst = '$2 $1-$3';
var result = str.replace(re, subst);
You can use split too:
var text = 'GPX 10.802.123/3843­ 1 -­ IDENTIFIER 48';
var parts = text.split(/\s+/);
if (parts[4] == 'IDENTIFIER') {
var result = parts[4] + ' ' + parts[1] + '-' + parts[5];
console.log(result);
}

javascript match returning word boundry

RegEx is not my strong suit. I hope one of you can help me.
I am trying to use javacript.match() to search for all hashed(# at the beginning) words.
and I get a returned white space.
string = "#foo #bar"
result = string.match(/(^|\W)(#[a-z\d][\w-]*)/ig);
console.log(result)
Results in:
["#foo", " #bar"]
Notice the whitespace in #bar.
What should I change to my RegEx to exclude the boundary in the return to .match?
Thanks for the help!!
You don't need (^|\W) before as # is considered non word character anyway. You are getting space because \W will match space also.
This regex would work better:
var re = /(?:^|\s)(#[a-z\d][\w-]*)/g,
matches = [],
input = "#foo #bar abc#baz";
while (match = re.exec(input)) matches.push(match[1].trim());
console.log(matches);
//=> ["#foo", "#bar"]
EDIT: To avoid loops:
var m = [];
var str = "#foo #bar abc#baz";
str.replace(/(^|\s)(#[a-z\d][\w-]*)/g, function($1) { m.push($1.trim()); return $1; } );
console.log(m);
//=> ["#foo", "#bar"]
You need to use this syntax to extract capture groups:
var str = '#foo #bar';
var myRegexp = new RegExp('(?:^|\\W)(#[^\\W_][\\w-]*)', 'g');
var matchResult = myRegexp.exec(str);
var result = Array();
while (matchResult != null) {
result.push(matchResult[1]);
matchResult = myRegexp.exec(str);
}
console.log(result);
If you don't want to loop through the match results, you can use this trick:
var str = '#foo #bar';
var result = Array();
str.replace(/(?:^|\W)(#[^\W_][\w-]*)/g, function (m, g1) { result.push(g1); } );
console.log(result);

Replace last occurrence of character in string

Is there an easy way in javascript to replace the last occurrence of an '_' (underscore) in a given string?
You don't need jQuery, just a regular expression.
This will remove the last underscore:
var str = 'a_b_c';
console.log( str.replace(/_([^_]*)$/, '$1') ) //a_bc
This will replace it with the contents of the variable replacement:
var str = 'a_b_c',
replacement = '!';
console.log( str.replace(/_([^_]*)$/, replacement + '$1') ) //a_b!c
No need for jQuery nor regex assuming the character you want to replace exists in the string
Replace last char in a string
str = str.substring(0,str.length-2)+otherchar
Replace last underscore in a string
var pos = str.lastIndexOf('_');
str = str.substring(0,pos) + otherchar + str.substring(pos+1)
or use one of the regular expressions from the other answers
var str1 = "Replace the full stop with a questionmark."
var str2 = "Replace last _ with another char other than the underscore _ near the end"
// Replace last char in a string
console.log(
str1.substring(0,str1.length-2)+"?"
)
// alternative syntax
console.log(
str1.slice(0,-1)+"?"
)
// Replace last underscore in a string
var pos = str2.lastIndexOf('_'), otherchar = "|";
console.log(
str2.substring(0,pos) + otherchar + str2.substring(pos+1)
)
// alternative syntax
console.log(
str2.slice(0,pos) + otherchar + str2.slice(pos+1)
)
What about this?
function replaceLast(x, y, z){
var a = x.split("");
a[x.lastIndexOf(y)] = z;
return a.join("");
}
replaceLast("Hello world!", "l", "x"); // Hello worxd!
Another super clear way of doing this could be as follows:
let modifiedString = originalString
.split('').reverse().join('')
.replace('_', '')
.split('').reverse().join('')
Keep it simple
var someString = "a_b_c";
var newCharacter = "+";
var newString = someString.substring(0, someString.lastIndexOf('_')) + newCharacter + someString.substring(someString.lastIndexOf('_')+1);
var someString = "(/n{})+++(/n{})---(/n{})$$$";
var toRemove = "(/n{})"; // should find & remove last occurrence
function removeLast(s, r){
s = s.split(r)
return s.slice(0,-1).join(r) + s.pop()
}
console.log(
removeLast(someString, toRemove)
)
Breakdown:
s = s.split(toRemove) // ["", "+++", "---", "$$$"]
s.slice(0,-1) // ["", "+++", "---"]
s.slice(0,-1).join(toRemove) // "})()+++})()---"
s.pop() // "$$$"
Reverse the string, replace the char, reverse the string.
Here is a post for reversing a string in javascript: How do you reverse a string in place in JavaScript?
// Define variables
let haystack = 'I do not want to replace this, but this'
let needle = 'this'
let replacement = 'hey it works :)'
// Reverse it
haystack = Array.from(haystack).reverse().join('')
needle = Array.from(needle).reverse().join('')
replacement = Array.from(replacement).reverse().join('')
// Make the replacement
haystack = haystack.replace(needle, replacement)
// Reverse it back
let results = Array.from(haystack).reverse().join('')
console.log(results)
// 'I do not want to replace this, but hey it works :)'
This is very similar to mplungjan's answer, but can be a bit easier (especially if you need to do other string manipulation right after and want to keep it as an array)
Anyway, I just thought I'd put it out there in case someone prefers it.
var str = 'a_b_c';
str = str.split(''); //['a','_','b','_','c']
str.splice(str.lastIndexOf('_'),1,'-'); //['a','_','b','-','c']
str = str.join(''); //'a_b-c'
The '_' can be swapped out with the char you want to replace
And the '-' can be replaced with the char or string you want to replace it with
You can use this code
var str="test_String_ABC";
var strReplacedWith=" and ";
var currentIndex = str.lastIndexOf("_");
str = str.substring(0, currentIndex) + strReplacedWith + str.substring(currentIndex + 1, str.length);
alert(str);
This is a recursive way that removes multiple occurrences of "endchar":
function TrimEnd(str, endchar) {
while (str.endsWith(endchar) && str !== "" && endchar !== "") {
str = str.slice(0, -1);
}
return str;
}
var res = TrimEnd("Look at me. I'm a string without dots at the end...", ".");
console.log(res)

Categories

Resources