Javascript regex to exclude if inside brackets - javascript

I'm trying to split an expression string on the characters +*() but I need to only split there if they are not inside brackets. I am struggling to create the correct regex for this task.
Example:
.N.TESTARRAY[INT1+2] + ONLINE * STACKOVERFLOW
I would like this to produce an array such as:
[ ".N.TESTARRAY[INT1+2]" , "ONLINE" , "STACKOVERFLOW" ]
Currently my regex splits on the characters +*()[]
split(/(\+|\*|\[|\]|\(|\))/g)
Because I split on the square brackets as well, I then loop through the array and merge anything inside the brackets. I think this is clunky though and would like to move it all into the regex.
function mergeInnerBrackets(tokens) {
var merged = [];
var depth = 0;
for(var i = 0; i < tokens.length; i++) {
if(tokens[i] == "[") {
depth++;
merged[merged.length-1] += "[";
} else if(tokens[i] == "]") {
depth--;
merged[merged.length-1] += "]";
} else if(depth > 0) {
merged[merged.length-1] += tokens[i];
} else {
merged.push(tokens[i]);
}
}
return merged;
}
I am using JavaScript and need to be able to support back to IE8. I also read something about being able to do this with negative-look behinds but I saw that JavaScript doesn't support that.

Can string also be like .N.TEST[INT1+2] + ON[INT1] * ON[INT2] ? I'd rather use regex like this.
\s*[+*)(](?![^[]*])\s*
If the string can contain more than one pair of [...]
var str = '.N.TEST[INT1+2] + ON[INT1] * ON[INT2]';
console.log(str.split(/\s*[+*)(](?![^[]*])\s*/));
JSFiddle demo

Related

Replace content present in the nested brackets

Input = ABCDEF ((3) abcdef),GHIJKLMN ((4)(5) Value),OPQRSTUVW((4(5)) Value (3))
Expected Output = ABCDEF,GHIJKLMN,OPQRSTUVW
Tried so far
Output = Input.replace(/ *\([^)]*\)*/g, "");
Using a regex here probably won't work, or scale, because you expect nested parentheses in your input string. Regex works well when there is a known and fixed structure to the input. Instead, I would recommend that you approach this using a parser. In the code below, I iterate over the input string, one character at at time, and I use a counter to keep track of how many open parentheses there are. If we are inside a parenthesis term, then we don't record those characters. I also have one simple replacement at the end to remove whitespace, which is an additional step which your output implies, but you never explicitly mentioned.
var pCount = 0;
var Input = "ABCDEF ((3) abcdef),GHIJKLMN ((4)(5) Value),OPQRSTUVW((4(5)) Value (3))";
var Output = "";
for (var i=0; i < Input.length; i++) {
if (Input[i] === '(') {
pCount++;
}
else if (Input[i] === ')') {
pCount--;
}
else if (pCount == 0) {
Output += Input[i];
}
}
Output = Output.replace(/ /g,'');
console.log(Output);
If you need to remove nested parentheses, you may use a trick from Remove Nested Patterns with One Line of JavaScript.
var Input = "ABCDEF ((3) abcdef),GHIJKLMN ((4)(5) Value),OPQRSTUVW((4(5)) Value (3))";
var Output = Input;
while (Output != (Output = Output.replace(/\s*\([^()]*\)/g, "")));
console.log(Output);
Or, you could use a recursive function:
function remove_nested_parens(s) {
let new_s = s.replace(/\s*\([^()]*\)/g, "");
return new_s == s ? s : remove_nested_parens(new_s);
}
console.log(remove_nested_parens("ABCDEF ((3) abcdef),GHIJKLMN ((4)(5) Value),OPQRSTUVW((4(5)) Value (3))"));
Here, \s*\([^()]*\) matches 0+ whitespaces, (, 0+ chars other than ( and ) and then a ), and the replace operation is repeated until the string does not change.

how to replace a char in string with many chars

I want to change a char in string with many values
I have string like this :
date_format = "%m/%d/%Y";
And i want to replace ever % with the char which after, so the date variable should be like this:
date_format="mm/dd/YY";
Here is what I tried so far, but i can't get it to work, so I need some help here:
function replaceon(str, index, chr) {
if (index > str.length - 1) return str;
return str.substr(0, index) + chr + str.substr(index + 1);
}
function locations(substring, string) {
var a = [],
i = -1;
while ((i = string.indexOf(substring, i + 1)) >= 0) a.push(i);
return a;
}
function corrent_format(date_format) {
var my_locations = locations('%', date_format);
console.log(my_locations.length);
for (var i = 0; i < my_locations.length; i++) {
replaceon(date_format, my_locations[i], date_format[my_locations[i] + 1]);
}
return date_format;
}
console.log(corrent_format(date_format));
You can try this:
"%m/%d/%Y".replace(/%([^%])/g,"$1$1")
Hope this hepls.
You can use a regular expression for that:
var date_format="%m/%d/%Y";
var res = date_format.replace(/%(.)/g, "$1$1");
console.log(res);
function act(str) {
var res = "";
for (var i = 0; i < (str.length - 1); i++) {
if (str[i] === "%")
res += str[i + 1];
else
res += str[i];
}
res += str[i];
return res;
}
var date_format = "%m/%d/%Y";
console.log(act(date_format));
Your code is not working because the date_format variable is not being modified by the corrent_format function. The replaceon function returns a new string. If you assign the result to date_format, you should get the expected result:
for (var i = 0; i < my_locations.length; i++) {
date_format = replaceon(date_format, my_locations[i], date_format[my_locations[i]+1])
}
Alternatively, you could perform the replacement using String.replace and a regular expression:
date_format.replace(/%(.)/g, '$1$1');
For the regex-challenged among us, here's a translation of /%(.)/g, '$1$1':
/ means that the next part is going to be regex.
% find a %.
. any single character, so %. would match %m, %d, and/or %Y.
(.) putting it in parens means to capture the value to use later on.
/g get all the matches in the source string (instead of just the first one).
?1 references the value we captured before in (.).
?1?1 repeat the captured value twice.
So, replace every %. with whatever's in the ., times two.
Now, this regex expression is the most concise and quickest way to do the job at hand. But maybe you can't use regular expressions. Maybe you have a dyslexic boss who has outlawed their use. (Dyslexia and regex are uneasy companions at best.) Maybe you haven't put in the 47 hours screaming at regular expressions that aren't doing what you want, that you're required to put in before you're allowed to use them. Or maybe you just hate regular expressions.
If any of these apply, you can also do this:
var x = '%m/%d/%y';
x = x.replace('%', 'm');
x = x.replace('%', 'd');
x = x.replace('%', 'y');
alert(x);
This takes advantage of the fact that the replace function only replaces the first match found.
But seriously, don't use this. Use regex. It's always better to invest that 20 hours working out a regex expression that condenses the 20 lines of code you wrote in 15 minutes down to one. Unless you have to get it done sometime tonight, and whatever you're trying just doesn't work, and it's getting close to midnight, and you're getting tired...well, no. Use regex. Really. Resist the temptation to avoid finding a regex solution. You'll be glad you did when you wake up at your desk in the morning, having missed your deadline, and get to go home and spend more time with your family, courtesy of your generous severance package.

How to remove string between two characters every time they occur [duplicate]

This question already has answers here:
Strip HTML from Text JavaScript
(44 answers)
removing html tags from string
(3 answers)
Closed 7 years ago.
I need to get rid of any text inside < and >, including the two delimiters themselves.
So for example, from string
<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>​
I would like to get this one
that
This is what i've tried so far:
var str = annotation.split(' ');
str.substring(str.lastIndexOf("<") + 1, str.lastIndexOf(">"))
But it doesn't work for every < and >.
I'd rather not use RegEx if possible, but I'm happy to hear if it's the only option.
You can simply use the replace method with /<[^>]*>/g.It matches < followed by [^>]* any amount of non> until > globally.
var str = '<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>';
str = str.replace(/<[^>]*>/g, "");
alert(str);
For string removal you can use RegExp, it is ok.
"<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>​".replace(/<\/?[^>]+>/g, "")
Since the text you want is always after a > character, you could split it at that point, and then the first character in each String of the array would be the character you need. For example:
String[] strings = stringName.split("<");
String word = "";
for(int i = 0; i < strings.length; i++) {
word += strings[i].charAt(0);
}
This is probably glitchy right now, but I think this would work. You don't need to actually remove the text between the "<>"- just get the character right after a '>'
Using a regular expression is not the only option, but it's a pretty good option.
You can easily parse the string to remove the tags, for example by using a state machine where the < and > characters turns on and off a state of ignoring characters. There are other methods of course, some shorter, some more efficient, but they will all be a few lines of code, while a regular expression solution is just a single replace.
Example:
function removeHtml1(str) {
return str.replace(/<[^>]*>/g, '');
}
function removeHtml2(str) {
var result = '';
var ignore = false;
for (var i = 0; i < str.length; i++) {
var c = str.charAt(i);
switch (c) {
case '<': ignore = true; break;
case '>': ignore = false; break;
default: if (!ignore) result += c;
}
}
return result;
}
var s = "<brev-y>th</brev-y><sw-ex>a</sw-ex><sl>t</sl>";
console.log(removeHtml1(s));
console.log(removeHtml2(s));
There are several ways to do this. Some are better than others. I haven't done one lately for these two specific characters, so I took a minute and wrote some code that may work. I will describe how it works. Create a function with a loop that copies an incoming string, character by character, to an outgoing string. Make the function a string type so it will return your modified string. Create the loop to scan from incoming from string[0] and while less than string.length(). Within the loop, add an if statement. When the if statement sees a "<" character in the incoming string it stops copying, but continues to look at every character in the incoming string until it sees the ">" character. When the ">" is found, it starts copying again. It's that simple.
The following code may need some refinement, but it should get you started on the method described above. It's not the fastest and not the most elegant but the basic idea is there. This did compile, and it ran correctly, here, with no errors. In my test program it produced the correct output. However, you may need to test it further in the context of your program.
string filter_on_brackets(string str1)
{
string str2 = "";
int copy_flag = 1;
for (size_t i = 0 ; i < str1.length();i++)
{
if(str1[i] == '<')
{
copy_flag = 0;
}
if(str1[i] == '>')
{
copy_flag = 2;
}
if(copy_flag == 1)
{
str2 += str1[i];
}
if(copy_flag == 2)
{
copy_flag = 1;
}
}
return str2;
}

Splitting a string by ],[

I have a string like the following:
"[a,b,c],[d,e,f],[g,h,i]"
I was wondering how can I separate the string by ],[ in JavaScript. .split("],[") will remove the brackets. I want to preserve them.
Expected output:
["[a,b,c]","[d,e,f]","[g,h,i]"]
Edit:
Here is a more complicated case that I highlighted in a comment on #Leo's answer (wherein a ],[-delimited string contains ],):
"[dfs[dfs],dfs],[dfs,df,sdfs]]"
Expected output:
["[dfs[dfs],dfs]","[dfs,df,sdfs]]"]
Try this:
"[a,b,c],[d,e,f],[g,h,i]".match(/(\[[^\]]+\])/g)
// ["[a,b,c]", "[d,e,f]", "[g,h,i]"]
EDIT For OP's new case, here's the trick:
"[dfs[dfs],dfs],[dfs,df,sdfs]]".match(/(?!,\[).+?\](?=,\[|$)/g)
// ["[dfs[dfs],dfs]", "[dfs,df,sdfs]]"]
It works for even more complicated cases:
"[dfs[aa,[a],dfs],[dfs[dfs],dfs],[dfs,df,sdfs]]".match(/(?!,\[).+?\](?=,\[|$)/g)
// ["[dfs[aa,[a],dfs]", "[dfs[dfs],dfs]", "[dfs,df,sdfs]]"]
"[dfs[aa,[a],dfs],[dfs[dfs],dfs],[dfs,df,sdfs]],[dfs,df,sdfs]]".match(/(?!,\[).+?\](?=,\[|$)/g)
// ["[dfs[aa,[a],dfs]", "[dfs[dfs],dfs]", "[dfs,df,sdfs]]", "[dfs,df,sdfs]]"]
Below is my personal opinion
However, JavaScript's RegExp doesn't support lookbehind (?<, which is super handy for such requirements), using RegExp may become a maintainability nightmare. In this situation, I'd suggest an approach like, maybe #alienchow's replacing delimiters - not so neat, but more maintainable.
Personally I'd do
"[dfs[dfs],dfs],[dfs,df,sdfs]]".split("],[");
then loop through it to:
Append the first string with a "]".
Prepend the last string with a "[".
Prepend a "[" and append a "]" to all strings in between.
However, if you know what kind of strings and characters you will be receiving and you reaaaaally want a one-liner approach, you could try the hack below.
Replace all instances of "],[" with "]unlikely_string_or_special_unicode[", then split by "unlikely_string_or_special_unicode" - for example:
"[dfs[dfs],dfs],[dfs,df,sdfs]]".replace(/\],\[/g,"]~I_have_a_dream~[").split("~I_have_a_dream~");
Warning: Not 100% full-proof. If your input string has the unlikely string you used as a delimiter, then it implodes and the universe comes to an end.
TMTOWDI
I prefer doing this with a regex as #Leo explained, but another way to do it in the spirit of TMTOWDI & completeness is with the map function following the split:
var test = "[a,b,c],[d,e,f],[g,h,i]";
var splitTest = test.split("],[").map(
function(str) {
if (str[0] !== '[') {
str = '[' + str;
}
if (str[str.length - 1] !== ']') {
str += ']';
}
return str;
});
// FORNOW: to see the results
for (var i = 0; i < splitTest.length; i++) {
alert(splitTest[i]);
}
Afterthought:
If you perchance have an empty pair of square brackets in your ],[-delimited string (i.e. "[a,b,c],[d,e,f],[],[g,h,i]" for example), this approach will preserve it too (as would changing #Leo's regex from /(\[[^\]]+\])/g to /(\[[^\]]*\])/g).
TMTOWDI Redeux
With the curveball that ] and [ may be within the ],[-delimited strings (per your comment on #Leo's answer), here is a rehash of my initial approach that is more robust:
var test = "[dfs[dfs],dfs],[dfs,df,sdfs]]";
var splitTest = test.split("],[").map(
function(str, arrIndex, arr) {
if (arrIndex !== 0) {
str = '[' + str;
}
if (arrIndex !== arr.length - 1) {
str += ']';
}
return str;
});
// FORNOW: to see the results
for (var i = 0; i < splitTest.length; i++) {
alert(splitTest[i]);
}

encode parenthesis underscore tilt star with encodeURIComponent

encodeURIComponent escapes all characters except the following: - _ . ! ~ * ' ( )
But is it possible to extend the functionality encode the above special characters as well.
I know i can do something like this:
encodeURIComponent(str).replace(/\(/g, "%28").replace(/\)/g, "%29");
but I want functionality like this, without using additional functions on the encodeURIComponent
encodeURIComponent(str);
You should create your own function.
You should create your own function, really.
If you really know what you're doing, go to step 1.
Don't say I didn't warn you; there be dragons here:
(function() {
var _fn = encodeURIComponent;
window.encodeURIComponent = function(str) {
return _fn(str).replace(/\(/g, "%28").replace(/\)/g, "%29");
};
}());
you can write your custom encoding function
customEncodeURI(str : string){
var iChars = ':",_{}/\\'; // provide all the set of chars that you want
var encodedStr = ''
for (var i = 0; i < str.length; i++) {
if (iChars.indexOf(str.charAt(i)) != -1) {
var hex = (str.charCodeAt(i)).toString(16);
encodedStr += '%' + hex;
}else {
encodedStr += str[i];
}
}
console.log("Final encoded string is "+encodedStr);
console.log("Final decoded string is "+decodeURIComponent(encodedStr));
return encodedStr;
}

Categories

Resources