JavaScript regex using a character twice - javascript

So I'm using regex to grab information from a string, the issue is I need to both start up and stop at a / in the string.
Here's an example
var regexp = /\/(.*?)=(.*?)\//g;
var url_hash = "/s=lorem+ipsum/p=2/";
var match;
var result = {};
while ((match = regexp.exec(url_hash)) != null) {
result[match[1]] = match[2];
}
I can grab result['s'] without issue, but grabbing result['p'] becomes problematic, because the ending / for result['s'] is the same as the starting / for result['p']. If I changed the string to /s=lorem+ipsum//p=2/ it works perfectly, but of course that's hideous. So how can I fix this so that it both ends and starts up at the /? I'm stuck, any help is appreciated.

Use this regex:
/\/([^/=]+)=([^/]+)/
Code:
var regexp = /\/([^/=]+)=([^/]+)/g;
var url_hash = "/#!/s=lorem+ipsum/p=2/";
var match;
var result = {};
while ((match = regexp.exec(url_hash)) != null) {
result[match[1]] = match[2];
document.writeln(match[1] + ' = ' + match[2] + '<br>');
}
OUTPUT:
s = lorem+ipsum
p = 2
Online demo of the code

Why can't you just split it?
var result = {};
var url = "/#!/s=lorem+ipsum/p=2/".slice(4, -1).split('/');
for (i in url) {
var value = url[i].split('=');
result[value[0]] = value[1];
}
console.log(result);

You can determine the look-ahead set for part after the = yourself instead of adding it to the regular expression. The look-ahead set is "everything but a forward slash".
var regexp = /\/(\w+)=([^/]+)/g;
Btw, I'm assuming that the part before the = is word-like (i.e. alphanumeric)

Related

I have to make a split function in JavaScript

Edit
sorry if the question wasn't clear
here is the question..
create your version of javascript split function,
you may use indexOf and substring to help.
so if i give you a string "heellloolllloolllo" and i want to remove "llll" the function should return "heellloooolllo"
This what I did so far:
function split() {
var entered_string = document.forms["form1"]["str"].value;
var deleted_char = document.forms["form1"]["char"].value;
var index = entered_string.indexOf(deleted_char);
var i = deleted_char.length;
var result;
var x ;
for (x = 0; x< entered_string.length; x++ )
{
if (index < 0) {
result = entered_string;
} else {
result = entered_string.substring(0, index) +entered_string.substring(index+i);
}
}
alert(result)
}
Use the replace() function with the g at the end of your regular expression. It's called a "global modifier".
var string = 'heellloolllloolllo';
var res = string.replace(/llll/g, '');
console.log(res)
If your substring is a variable then you need to construct a new Regex object and set the g as the second parameter.
var string = 'heellloolllloolllo';
var find = 'llll';
var regex = new RegExp(find,'g');
var res = string.replace(regex, '');
console.log(res)
There are other useful modifiers you can use:
g - Global replace. Replace all instances of the matched string in the provided text.
i - Case insensitive replace. Replace all instances of the matched string, ignoring differences in case.
m - Multi-line replace. The regular expression should be tested for matches over multiple lines.
See this post for more information, credit to #codejoe.
Using String#replace and RegExp (the clean way)
var str = 'llllheellloolllloolllollll';
var matchStr = 'llll';
function removeSubString(str, matchStr) {
var re = new RegExp(matchStr, 'g');
return str.replace(re,"");
}
console.log(removeSubString(str, matchStr));
Using String#indexOf and String#substring
var str = 'llllheellloolllloolllollll';
var matchStr = 'llll';
function removeSubString(str, matchStr) {
var index = str.indexOf(matchStr);
while(index != -1) {
var firstSubStr = str.substring(0, index);
var lastSubStr = str.substring(index + matchStr.length);
str = firstSubStr + lastSubStr;
index = str.indexOf(matchStr);
}
return str;
}
console.log(removeSubString(str,matchStr))

Regex to get words between ":" and "," in javascript

I'm learning regex. I'm trying to get the most correct regex for the following :
Input is:
class:first,class:second,subject:math,subject:bio,room:nine
Expected output:
first,second,math,bio,nine
Want to store the above output in a string . var s = "";
Here's what I tried:
(:)(.*)(,)
However I want the last word too.
Using RegExp.prototype.exec:
var re = /:(.*?)(?:,|$)/g; // `,|$` : match `,` or end of the string.
var str = 'class:first,class:second,subject:math,subject:bio,room:nine';
var result = [];
var match;
while ((match = re.exec(str)) !== null)
result.push(match[1]);
result.join(',') // => 'first,second,math,bio,nine'
Using String.prototype.match, Array.prototype.map:
var re = /:(.*?)(,|$)/g;
var str = 'class:first,class:second,subject:math,subject:bio,room:nine';
str.match(re).map(function(m) { return m.replace(/[:,]/g, ''); }).join(',')
// => 'first,second,math,bio,nine'
Here is another method (based on the request so far):
var str = 'class:first,class:second,subject:math,subject:bio,room:nine';
// global match doesn't have sub-patterns
// there isn't a look behind in JavaScript
var s = str.match(/:([^,]+)(?=,|$)/g);
// result: [":first", ":second", ":math", ":bio", ":nine"]
// convert to string and remove the :
s = s.join(',').replace(/:/g, '');
// result: first,second,math,bio,nine"
Here is the fiddle

Javascript: String search for regex, starting at the end of the string

Is there a javascript string function that search a regex and it will start the search at the end?
If not, what is the fastest and/or cleanest way to search the index of a regex starting from the end?
example of regex:
/<\/?([a-z][a-z0-9]*)\b[^>]*>?/gi
Maybe this can be useful and easier:
str.lastIndexOf(str.match(<your_regex_here>).pop());
Perhaps something like this is suitable for you?
Javascript
function lastIndexOfRx(string, regex) {
var match = string.match(regex);
return match ? string.lastIndexOf(match.slice(-1)) : -1;
}
var rx = /<\/?([a-z][a-z0-9]*)\b[^>]*>?/gi;
console.log(lastIndexOfRx("", rx));
console.log(lastIndexOfRx("<i>it</i><b>bo</b>", rx));
jsFiddle
And just for interest, this function vs the function that you choose to go with. jsperf
This requires that you format your regex correctly for matching exactly the pattern you want and globally (like given in your question), for example /.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i will not work with this function. But what you do get is a function that is clean and fast.
You may create a reverse function like:
function reverse (s) {
var o = '';
for (var i = s.length - 1; i >= 0; i--)
o += s[i];
return o;
}
and then use
var yourString = reverse("Your string goes here");
var regex = new Regex(your_expression);
var result = yourString.match(regex);
Another idea: if you want to search by word in reverse order then
function reverseWord(s) {
var o = '';
var split = s.split(' ');
for (var i = split.length - 1; i >= 0; i--)
o += split[i] + ' ';
return o;
}
var yourString = reverseWord("Your string goes here");
var regex = new Regex(your_expression);
var result = yourString.match(regex);
Andreas gave this from the comment:
https://stackoverflow.com/a/274094/402037
String.prototype.regexLastIndexOf = function(regex, startpos) {
regex = (regex.global) ? regex : new RegExp(regex.source, "g" + (regex.ignoreCase ? "i" : "") + (regex.multiLine ? "m" : ""));
if(typeof (startpos) == "undefined") {
startpos = this.length;
} else if(startpos < 0) {
startpos = 0;
}
var stringToWorkWith = this.substring(0, startpos + 1);
var lastIndexOf = -1;
var nextStop = 0;
while((result = regex.exec(stringToWorkWith)) != null) {
lastIndexOf = result.index;
regex.lastIndex = ++nextStop;
}
return lastIndexOf;
}
Which gives the functionality that I need, I tested my regex, and it is successful. So I'll use this
It depends what you exactly want to search for. You can use string.lastIndexOf or inside the regexp to use $ (end of the string).
Update:
try the regexp
/<\/?([a-z][a-z0-9]*)\b[^>]*>?[\w\W]*$/gi
var m = text.match(/.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i);
if (m) {
textFound = m[1];
position = text.lastIndexOf(textFound);
}
Use .* to skip as much text as posible, capture the text found and search it with lastIndexOf
EDIT:
Well, if text is found, no need to search with lastIndexOf. m[0] contains the full coincidence (including all the initial padding), and m[1] the searched text. So position of found text is m[0].length - m[1].length
var m = text.match(/.*(<\/?([a-z][a-z0-9]*)\b[^>]*>?)/i);
if (m) {
textFound = m[1];
position = m[0].length - m[1].length;
}
Assuming you're looking for a string 'token', then you need the position of 'token' that has no other 'token' following until the end of the string.
So you should compose your regex something like that:
$token = 'token';
$re = "/(?:$token)[^(?:$token)]*$/";
This will find your 'token' where no further 'token' can be found until string end. The "(?:" grouping simply makes the group non-storing, slightly speeding up performance and saving memory.

Get current matching regex-rule

I try to check for a given RegExp-rule in a string and need to get the current matching rule.
Here's what I've tried so far:
var prefixes = /-webkit-|-khtml-|-moz-|-ms-|-o-/g;
var match;
var str = '';
while ( !(match = prefixes.exec(str)) ) {
str += '-webkit-';
console.log(match); // => null
}
The match is null, but how can I get the current matching-rule (in this case -webkit-)?
var prefixes = /(-webkit-|-khtml-|-moz-|-ms-|-o-)/g;
var str = "-webkit-adsf-moz-adsf"
var m;
while(m = prefixes.exec(str))
console.log(m[0]);
You aren't asking for any groups in your regex, try surrounding your regex in parenthesis to define a group, e.g. /(-webkit-|-khtml-|-moz-|-ms-|-o-)/g.
Various other issues, try:
var prefixes = /(-webkit-|-khtml-|-moz-|-ms-|-o-)/g;
var match;
var str = 'prefix-ms-something';
match = prefixes.exec(str);
console.log(match);

Remove all occurrences except last?

I want to remove all occurrences of substring = . in a string except the last one.
E.G:
1.2.3.4
should become:
123.4
You can use regex with positive look ahead,
"1.2.3.4".replace(/[.](?=.*[.])/g, "");
2-liner:
function removeAllButLast(string, token) {
/* Requires STRING not contain TOKEN */
var parts = string.split(token);
return parts.slice(0,-1).join('') + token + parts.slice(-1)
}
Alternative version without the requirement on the string argument:
function removeAllButLast(string, token) {
var parts = string.split(token);
if (parts[1]===undefined)
return string;
else
return parts.slice(0,-1).join('') + token + parts.slice(-1)
}
Demo:
> removeAllButLast('a.b.c.d', '.')
"abc.d"
The following one-liner is a regular expression that takes advantage of the fact that the * character is greedy, and that replace will leave the string alone if no match is found. It works by matching [longest string including dots][dot] and leaving [rest of string], and if a match is found it strips all '.'s from it:
'a.b.c.d'.replace(/(.*)\./, x => x.replace(/\./g,'')+'.')
(If your string contains newlines, you will have to use [.\n] rather than naked .s)
You can do something like this:
var str = '1.2.3.4';
var last = str.lastIndexOf('.');
var butLast = str.substring(0, last).replace(/\./g, '');
var res = butLast + str.substring(last);
Live example:
http://jsfiddle.net/qwjaW/
You could take a positive lookahead (for keeping the last dot, if any) and replace the first coming dots.
var string = '1.2.3.4';
console.log(string.replace(/\.(?=.*\.)/g, ''));
A replaceAllButLast function is more useful than a removeAllButLast function. When you want to remove just replace with an empty string:
function replaceAllButLast(str, pOld, pNew) {
var parts = str.split(pOld)
if (parts.length === 1) return str
return parts.slice(0, -1).join(pNew) + pOld + parts.slice(-1)
}
var test = 'hello there hello there hello there'
test = replaceAllButLast(test, ' there', '')
console.log(test) // hello hello hello there
Found a much better way of doing this. Here is replaceAllButLast and appendAllButLast as they should be done. The latter does a replace whilst preserving the original match. To remove, just replace with an empty string.
var str = "hello there hello there hello there"
function replaceAllButLast(str, regex, replace) {
var reg = new RegExp(regex, 'g')
return str.replace(reg, function(match, offset, str) {
var follow = str.slice(offset);
var isLast = follow.match(reg).length == 1;
return (isLast) ? match : replace
})
}
function appendAllButLast(str, regex, append) {
var reg = new RegExp(regex, 'g')
return str.replace(reg, function(match, offset, str) {
var follow = str.slice(offset);
var isLast = follow.match(reg).length == 1;
return (isLast) ? match : match + append
})
}
var replaced = replaceAllButLast(str, / there/, ' world')
console.log(replaced)
var appended = appendAllButLast(str, / there/, ' fred')
console.log(appended)
Thanks to #leaf for these masterpieces which he gave here.
You could reverse the string, remove all occurrences of substring except the first, and reverse it again to get what you want.
function formatString() {
var arr = ('1.2.3.4').split('.');
var arrLen = arr.length-1;
var outputString = '.' + arr[arrLen];
for (var i=arr.length-2; i >= 0; i--) {
outputString = arr[i]+outputString;
}
alert(outputString);
}
See it in action here: http://jsbin.com/izebay
var s='1.2.3.4';
s=s.split('.');
s.splice(s.length-1,0,'.');
s.join('');
123.4

Categories

Resources