regex restriction alphabet - javascript

I have a string as :
var str = "str is str, 12str345 and ABCstrDEF";
I want capture all str except ABCstrDEF (str surrounded by alphabetical characters)
Is it possible restrict alphabets with regex?

Go with
RegExp.quote = function(str) {
return (str + '').replace(/([.?*+^$[\]\\(){}|-])/g, "\\$1");
};
var re = new RegExp("/\b[^a-zA-Z]*?" + RegExp.quote(str) + "[^a-zA-Z]*?\b/g");
alert(input.match(re));

Related

Trouble with Regular Expression and special charters

I am using a regular Expression for a glosary function on a website, but it can not "handle" special charters as æ, ø and å. The regEx is as follows:
var re = new RegExp("\\b" + pat + "\\b", "g");
How can i modify the RegEx above to handle special charters?
You may use the solution from here:
function getWholeWords(input, word) {
var pL = "a-zA-Z\\xAA\\xB5\\xBA\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\u02C1\\u02C6-\\u02D1\\u02E0-\\u02E4\\u02EC\\u02EE\\u0370-\\u0374\\u0376\\u0377\\u037A-\\u037D\\u037F\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03F5\\u03F7-\\u0481\\u048A-\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0587\\u05D0-\\u05EA\\u05F0-\\u05F2\\u0620-\\u064A\\u066E\\u066F\\u0671-\\u06D3\\u06D5\\u06E5\\u06E6\\u06EE\\u06EF\\u06FA-\\u06FC\\u06FF\\u0710\\u0712-\\u072F\\u074D-\\u07A5\\u07B1\\u07CA-\\u07EA\\u07F4\\u07F5\\u07FA\\u0800-\\u0815\\u081A\\u0824\\u0828\\u0840-\\u0858\\u08A0-\\u08B2\\u0904-\\u0939\\u093D\\u0950\\u0958-\\u0961\\u0971-\\u0980\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BD\\u09CE\\u09DC\\u09DD\\u09DF-\\u09E1\\u09F0\\u09F1\\u0A05-\\u0A0A\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A33\\u0A35\\u0A36\\u0A38\\u0A39\\u0A59-\\u0A5C\\u0A5E\\u0A72-\\u0A74\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABD\\u0AD0\\u0AE0\\u0AE1\\u0B05-\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-\\u0B39\\u0B3D\\u0B5C\\u0B5D\\u0B5F-\\u0B61\\u0B71\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-\\u0BAA\\u0BAE-\\u0BB9\\u0BD0\\u0C05-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-\\u0C39\\u0C3D\\u0C58\\u0C59\\u0C60\\u0C61\\u0C85-\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBD\\u0CDE\\u0CE0\\u0CE1\\u0CF1\\u0CF2\\u0D05-\\u0D0C\\u0D0E-\\u0D10\\u0D12-\\u0D3A\\u0D3D\\u0D4E\\u0D60\\u0D61\\u0D7A-\\u0D7F\\u0D85-\\u0D96\\u0D9A-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD\\u0DC0-\\u0DC6\\u0E01-\\u0E30\\u0E32\\u0E33\\u0E40-\\u0E46\\u0E81\\u0E82\\u0E84\\u0E87\\u0E88\\u0E8A\\u0E8D\\u0E94-\\u0E97\\u0E99-\\u0E9F\\u0EA1-\\u0EA3\\u0EA5\\u0EA7\\u0EAA\\u0EAB\\u0EAD-\\u0EB0\\u0EB2\\u0EB3\\u0EBD\\u0EC0-\\u0EC4\\u0EC6\\u0EDC-\\u0EDF\\u0F00\\u0F40-\\u0F47\\u0F49-\\u0F6C\\u0F88-\\u0F8C\\u1000-\\u102A\\u103F\\u1050-\\u1055\\u105A-\\u105D\\u1061\\u1065\\u1066\\u106E-\\u1070\\u1075-\\u1081\\u108E\\u10A0-\\u10C5\\u10C7\\u10CD\\u10D0-\\u10FA\\u10FC-\\u1248\\u124A-\\u124D\\u1250-\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u1380-\\u138F\\u13A0-\\u13F4\\u1401-\\u166C\\u166F-\\u167F\\u1681-\\u169A\\u16A0-\\u16EA\\u16F1-\\u16F8\\u1700-\\u170C\\u170E-\\u1711\\u1720-\\u1731\\u1740-\\u1751\\u1760-\\u176C\\u176E-\\u1770\\u1780-\\u17B3\\u17D7\\u17DC\\u1820-\\u1877\\u1880-\\u18A8\\u18AA\\u18B0-\\u18F5\\u1900-\\u191E\\u1950-\\u196D\\u1970-\\u1974\\u1980-\\u19AB\\u19C1-\\u19C7\\u1A00-\\u1A16\\u1A20-\\u1A54\\u1AA7\\u1B05-\\u1B33\\u1B45-\\u1B4B\\u1B83-\\u1BA0\\u1BAE\\u1BAF\\u1BBA-\\u1BE5\\u1C00-\\u1C23\\u1C4D-\\u1C4F\\u1C5A-\\u1C7D\\u1CE9-\\u1CEC\\u1CEE-\\u1CF1\\u1CF5\\u1CF6\\u1D00-\\u1DBF\\u1E00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4\\u1FB6-\\u1FBC\\u1FBE\\u1FC2-\\u1FC4\\u1FC6-\\u1FCC\\u1FD0-\\u1FD3\\u1FD6-\\u1FDB\\u1FE0-\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FFC\\u2071\\u207F\\u2090-\\u209C\\u2102\\u2107\\u210A-\\u2113\\u2115\\u2119-\\u211D\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2139\\u213C-\\u213F\\u2145-\\u2149\\u214E\\u2183\\u2184\\u2C00-\\u2C2E\\u2C30-\\u2C5E\\u2C60-\\u2CE4\\u2CEB-\\u2CEE\\u2CF2\\u2CF3\\u2D00-\\u2D25\\u2D27\\u2D2D\\u2D30-\\u2D67\\u2D6F\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u2E2F\\u3005\\u3006\\u3031-\\u3035\\u303B\\u303C\\u3041-\\u3096\\u309D-\\u309F\\u30A1-\\u30FA\\u30FC-\\u30FF\\u3105-\\u312D\\u3131-\\u318E\\u31A0-\\u31BA\\u31F0-\\u31FF\\u3400-\\u4DB5\\u4E00-\\u9FCC\\uA000-\\uA48C\\uA4D0-\\uA4FD\\uA500-\\uA60C\\uA610-\\uA61F\\uA62A\\uA62B\\uA640-\\uA66E\\uA67F-\\uA69D\\uA6A0-\\uA6E5\\uA717-\\uA71F\\uA722-\\uA788\\uA78B-\\uA78E\\uA790-\\uA7AD\\uA7B0\\uA7B1\\uA7F7-\\uA801\\uA803-\\uA805\\uA807-\\uA80A\\uA80C-\\uA822\\uA840-\\uA873\\uA882-\\uA8B3\\uA8F2-\\uA8F7\\uA8FB\\uA90A-\\uA925\\uA930-\\uA946\\uA960-\\uA97C\\uA984-\\uA9B2\\uA9CF\\uA9E0-\\uA9E4\\uA9E6-\\uA9EF\\uA9FA-\\uA9FE\\uAA00-\\uAA28\\uAA40-\\uAA42\\uAA44-\\uAA4B\\uAA60-\\uAA76\\uAA7A\\uAA7E-\\uAAAF\\uAAB1\\uAAB5\\uAAB6\\uAAB9-\\uAABD\\uAAC0\\uAAC2\\uAADB-\\uAADD\\uAAE0-\\uAAEA\\uAAF2-\\uAAF4\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-\\uAB2E\\uAB30-\\uAB5A\\uAB5C-\\uAB5F\\uAB64\\uAB65\\uABC0-\\uABE2\\uAC00-\\uD7A3\\uD7B0-\\uD7C6\\uD7CB-\\uD7FB\\uF900-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D\\uFB1F-\\uFB28\\uFB2A-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFB\\uFE70-\\uFE74\\uFE76-\\uFEFC\\uFF21-\\uFF3A\\uFF41-\\uFF5A\\uFF66-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC";
var rx = RegExp("(?:^|[^_0-9" + pL + "])(" + word + ")(?![_0-9" + pL + "])", "ig"); // Build the regex (might be moved out from the function)
var words = [];
while ((m = rx.exec(input)) !== null) {
words.push(m[1]); // Add an occurrence
}
return words;
}
var word = "æøå";
var input = "æøå, AæøåZ, BæøåY, and æøå!";
document.body.innerHTML = "<pre>" + JSON.stringify(getWholeWords(input, word), 0, 4) + "</pre>";
Or a regex that will look for a word only if it is enclosed with whitespace/start/end of the string:
var re = new RegExp("(?:^|\\s)(" + pat.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&') + ")(?!\\S)", "g");
and grab Group 1 value.

Javascript RegExp ignores Metacharacters

I've been trying hours to figure this out without any luck. Would be grateful if someone could point me to the right direction.
The problem is that the RegExp Object is totally ignoring the \b and \s Metacharacters.
var orOperator = 'or';
var term = 'ipsum';
var str = 'lorem or ipsum or ipsumal';
var regex = new RegExp('(\\s)\\b' + orOperator + '\\s' + term + '\\b', 'gi');
console.log(regex);
str = str.replace(regex, '');
While the resulting regex should be
/(\s)\bor\sipsum\b/gi
I keep getting this one instead (with Metacharacters stripped) and I don't know why
/(s)orsipsum/gi
You can also use String.raw tag function (more on MDN) — it'll ignore (not process) any escape sequences aka metacharacters and allow substitutions:
var orOperator = 'or';
var term = 'ipsum';
var str = 'lorem or ipsum or ipsumal';
var regexStr = String.raw`(\s)\b${orOperator}\s${term}\b`;
var regexFlags = 'gi';
var regex = new RegExp(regexStr, regexFlags);
console.log(regex);
str = str.replace(regex, '');
and result is
/(\s)\bor\sipsum\b/gi
'lorem or ipsumal'
As #YizhengShen and #CasimiretHippolyte suggested in the comments, 4 backslashes for each Metacharacter did the trick.
var regex = new RegExp('(\\\\s)\\\\b' + orOperator + '\\\\s' + term + '\\\\b', 'gi');

Split strings but preserve delimiters in the array of splits

Given a string like the following in JavaScript
var a = 'hello world\n\nbye world\n\nfoo\nbar\n\nfoo\nbaz\n\n';
I want to split it into an array like this
['hello world', '\n\n', 'bye world', '\n\n', 'foo\nbar', '\n\n', 'foo\nbaz', '\n\n'].
If the input is var a = 'hello world\n\nbye world', the result should be ['hello world', '\n\n', 'bye world'].
In other words, I want to split the string around '\n\n' into an array such that the array contains the '\n\n' as well. Is there any neat way to do this in JavaScript?
Here’s a one liner:
str.match(/\n\n|(?:[^\n]|\n(?!\n))+/g)
Here’s how it works:
\n\n matches the two consecutive newline characters
(?:[^\n]|\n(?!\n))+ matches any sequence of one or more character of either
[^\n] not a newline character, or
\n(?!\n) a newline character but only if not followed by another newline character
This recursive pattern can be applied on any length:
// useful function to quote strings for literal match in regular expressions
RegExp.quote = RegExp.quote || function(str) {
return (str+"").replace(/(?=[.?*+^$[\]\\(){}|-])/g, "\\");
};
// helper function to build the above pattern recursively
function buildRecursivePattern(chars, i) {
var c = RegExp.quote(chars[i]);
if (i < chars.length-1) return "(?:[^" + c + "]|" + c + buildRecursivePattern(chars, i+1) + ")";
else return "(?!" + c + ")";
}
function buildPattern(str) {
return RegExp(RegExp.quote(delimiter) + "|" + buildRecursivePattern(delimiter.match(/[^]/g), 0) + "+", "g");
}
var str = 'hello world\n\nbye world\n\nfoo\nbar\n\nfoo\nbaz\n\n',
delimiter = "\n\n",
parts;
parts = str.match(buildPattern(delimiter))
Update    Here’s a modification for String.prototype.split that should add the feature of containing a matched separator as well:
if ("a".split(/(a)/).length !== 3) {
(function() {
var _f = String.prototype.split;
String.prototype.split = function(separator, limit) {
if (separator instanceof RegExp) {
var re = new RegExp(re.source, "g"+(re.ignoreCase?"i":"")+(re.multiline?"m":"")),
match, result = [], counter = 0, lastIndex = 0;
while ((match = re.exec(this)) !== null) {
result.push(this.substr(lastIndex, match.index-lastIndex));
if (match.length > 1) result.push(match[1]);
lastIndex = match.index + match[0].length;
if (++counter === limit) break;
}
result.push(this.substr(lastIndex));
return result;
} else {
return _f.apply(arguments);
}
}
})();
}

Remove all occurrences except last?

I want to remove all occurrences of substring = . in a string except the last one.
E.G:
1.2.3.4
should become:
123.4
You can use regex with positive look ahead,
"1.2.3.4".replace(/[.](?=.*[.])/g, "");
2-liner:
function removeAllButLast(string, token) {
/* Requires STRING not contain TOKEN */
var parts = string.split(token);
return parts.slice(0,-1).join('') + token + parts.slice(-1)
}
Alternative version without the requirement on the string argument:
function removeAllButLast(string, token) {
var parts = string.split(token);
if (parts[1]===undefined)
return string;
else
return parts.slice(0,-1).join('') + token + parts.slice(-1)
}
Demo:
> removeAllButLast('a.b.c.d', '.')
"abc.d"
The following one-liner is a regular expression that takes advantage of the fact that the * character is greedy, and that replace will leave the string alone if no match is found. It works by matching [longest string including dots][dot] and leaving [rest of string], and if a match is found it strips all '.'s from it:
'a.b.c.d'.replace(/(.*)\./, x => x.replace(/\./g,'')+'.')
(If your string contains newlines, you will have to use [.\n] rather than naked .s)
You can do something like this:
var str = '1.2.3.4';
var last = str.lastIndexOf('.');
var butLast = str.substring(0, last).replace(/\./g, '');
var res = butLast + str.substring(last);
Live example:
http://jsfiddle.net/qwjaW/
You could take a positive lookahead (for keeping the last dot, if any) and replace the first coming dots.
var string = '1.2.3.4';
console.log(string.replace(/\.(?=.*\.)/g, ''));
A replaceAllButLast function is more useful than a removeAllButLast function. When you want to remove just replace with an empty string:
function replaceAllButLast(str, pOld, pNew) {
var parts = str.split(pOld)
if (parts.length === 1) return str
return parts.slice(0, -1).join(pNew) + pOld + parts.slice(-1)
}
var test = 'hello there hello there hello there'
test = replaceAllButLast(test, ' there', '')
console.log(test) // hello hello hello there
Found a much better way of doing this. Here is replaceAllButLast and appendAllButLast as they should be done. The latter does a replace whilst preserving the original match. To remove, just replace with an empty string.
var str = "hello there hello there hello there"
function replaceAllButLast(str, regex, replace) {
var reg = new RegExp(regex, 'g')
return str.replace(reg, function(match, offset, str) {
var follow = str.slice(offset);
var isLast = follow.match(reg).length == 1;
return (isLast) ? match : replace
})
}
function appendAllButLast(str, regex, append) {
var reg = new RegExp(regex, 'g')
return str.replace(reg, function(match, offset, str) {
var follow = str.slice(offset);
var isLast = follow.match(reg).length == 1;
return (isLast) ? match : match + append
})
}
var replaced = replaceAllButLast(str, / there/, ' world')
console.log(replaced)
var appended = appendAllButLast(str, / there/, ' fred')
console.log(appended)
Thanks to #leaf for these masterpieces which he gave here.
You could reverse the string, remove all occurrences of substring except the first, and reverse it again to get what you want.
function formatString() {
var arr = ('1.2.3.4').split('.');
var arrLen = arr.length-1;
var outputString = '.' + arr[arrLen];
for (var i=arr.length-2; i >= 0; i--) {
outputString = arr[i]+outputString;
}
alert(outputString);
}
See it in action here: http://jsbin.com/izebay
var s='1.2.3.4';
s=s.split('.');
s.splice(s.length-1,0,'.');
s.join('');
123.4

Replace all spaces in a string with '+' [duplicate]

This question already has answers here:
How do I replace all occurrences of a string in JavaScript?
(78 answers)
Closed 7 years ago.
I have a string that contains multiple spaces. I want to replace these with a plus symbol. I thought I could use
var str = 'a b c';
var replaced = str.replace(' ', '+');
but it only replaces the first occurrence. How can I get it replace all occurrences?
You need the /g (global) option, like this:
var replaced = str.replace(/ /g, '+');
You can give it a try here. Unlike most other languages, JavaScript, by default, only replaces the first occurrence.
Here's an alternative that doesn't require regex:
var str = 'a b c';
var replaced = str.split(' ').join('+');
var str = 'a b c';
var replaced = str.replace(/\s/g, '+');
You can also do it like:
str = str.replace(/\s/g, "+");
Have a look at this fiddle.
Use global search in the string. g flag
str.replace(/\s+/g, '+');
source: replaceAll function
Use a regular expression with the g modifier:
var replaced = str.replace(/ /g, '+');
From Using Regular Expressions with JavaScript and ActionScript:
/g enables "global" matching. When using the replace() method, specify this modifier to replace all matches, rather than only the first one.
You need to look for some replaceAll option
str = str.replace(/ /g, "+");
this is a regular expression way of doing a replaceAll.
function ReplaceAll(Source, stringToFind, stringToReplace) {
var temp = Source;
var index = temp.indexOf(stringToFind);
while (index != -1) {
temp = temp.replace(stringToFind, stringToReplace);
index = temp.indexOf(stringToFind);
}
return temp;
}
String.prototype.ReplaceAll = function (stringToFind, stringToReplace) {
var temp = this;
var index = temp.indexOf(stringToFind);
while (index != -1) {
temp = temp.replace(stringToFind, stringToReplace);
index = temp.indexOf(stringToFind);
}
return temp;
};
NON BREAKING SPACE ISSUE
In some browsers
(MSIE "as usually" ;-))
replacing space in string ignores the non-breaking space (the 160 char code).
One should always replace like this:
myString.replace(/[ \u00A0]/, myReplaceString)
Very nice detailed explanation:
http://www.adamkoch.com/2009/07/25/white-space-and-character-160/
Do this recursively:
public String replaceSpace(String s){
if (s.length() < 2) {
if(s.equals(" "))
return "+";
else
return s;
}
if (s.charAt(0) == ' ')
return "+" + replaceSpace(s.substring(1));
else
return s.substring(0, 1) + replaceSpace(s.substring(1));
}

Categories

Resources