String replace a url with part of the same url in javascript - javascript

I have string that contains a random url:
http://google.com/vocab/prefix#Billy
That needs to be transformed so that everything up to, and including the first # is replaced with the value between the last / and the first # followed by a :.
The result would be:
prefix:Billy
More examples:
http://some.url/a/path#elephant --> path:elephant
http://random.com/cool/black/beans/thing#Bob --> thing:bob
I understand how to capture the prefix part /([^\/]+(?=#))/, but I'm struggling to do a string replace because I can't figure out how to capture the part I need to replace.
myString.replace(/([^\/]+(?=#))/, '$1:')
I would prefer to use string.replace with regex if at all possible

When using replace method you need to match all the patterns you want to replace instead of just the part you need to keep; Here are two options:
let s = 'http://google.com/vocab/prefix#Billy'
// using greedy regex
console.log(s.replace(/.*\/([^#]+)#/, '$1:'))
// adapted from OP's attempt
console.log(s.replace(/.*?([^\/]+?)#/, '$1:'))
Note .* part to match the substring you want to discard, () to capture the pattern you want to keep, then reformat the output.

Try this code:
var myString = "http://google.com/vocab/prefix#Billy";
var hashIndex = myString.indexOf("#"); // find where the "#" is
for(var i = hashIndex; i > 0; i--) { // loop from "#" index *back* to closest "/" symbol
if(myString[i] == "/") break; // break loop when "/" is found
}
myString = myString.replace("#", ":"); // replace "#" with ":" as in your example
console.log(myString.substring(i, hashIndex); // output
Shortened:
var myString = "http://google.com/vocab/prefix#Billy".replace("#",":");
for(var i = myString.indexOf(":"); i > 0; i--) { if(myString[i] == "/") break; }
console.log(myString.substring(i, myString.indexOf(":");

Related

Get All possible matches between forward slashes

I would like to get all possible matches of a string with forward slashes '/' using regex.
I would like to regex that matches all the possibilities of a string between slashes but excludes a part which has no ending '/'
For example a string /greatgrandparent/grandparent/parent/child
it should return something like this:
/greatgrandparent/
/greatgrandparent/grandparent/
/greatgrandparent/grandparent/parent/
The following regex that will get each word that begins with a / and a positive lookahead for the / character is this /\/\w+(?=\/)/g
You can use the match() function that will place each word it finds in an array. You can then loop through the array to combine the different results. Check out the snippet below.
var str = `/greatgrandparent/grandparent/parent/child`;
var strArr = str.match(/\/\w+(?=\/)/g);
console.log(strArr);
var strLoop = ``;
for (i = 0; i < strArr.length; i++) {
strLoop += strArr[i];
document.write(`${strLoop}<br>`);
}

Regex to validate a texarea input which must be URLs separated by new lines

I am trying to create a regex which will ultimately be used with Google Forms to validate a texarea input.
The rule is,
Input area can have one or more URLs (http or https)
Each URL must be separated either by one or more new lines
Each line which has text, must be a single valid URL
Last URL may have or may not have new line character/s after it
Till now, I have written this regex ^(https?://.+[\r\n]+)*(https?://.+[\r\n]+?)$ but the problem is that if a line has more than 1 url, it validates that too.
Here is my testing playground: http://goo.gl/YPdvBH.
Here is what you are looking for
Demo , Demo with your URLS
function validate(ele) {
str = ele.value;
str = str.replace(/\r/g, "");
while (/\s\n/.test(str)) {
str = str.replace(/\s\n/g, "\n");
}
while (/\n\n/.test(str)) {
str = str.replace(/\n\n/g, "\n");
}
ele.value = str;
str = str.replace(/\n/g, "_!_&_!_").split("_!_&_!_")
var result = [], counter = 0;
for (var i = 0; i < str.length; i++) {
str[i] = str[i].replace(/(?:(?:^|\n)\s+|\s+(?:$|\n))/g, '').replace(/\s+/g, ' ');
if(str[i].length !== 0){
if (isValidAddress(str[i])) {
result.push(str[i]);
}
counter += 1;
}
}
function isValidAddress(s) {
return /^(https?|ftp):\/\/(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:)*#)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&'\(\)\*\+,;=]|:|#)|\/|\?)*)?$/i.test(s)
}
return (result.length === str.length);
}
var ele = document.getElementById('urls');
validate(ele);
This is closer to the regex you are looking for:
^(https?://[\S]+[\r\n]+)*(https?://[\S]+[\r\n]+?)$
The difference between your regex and this one is that you use .+ which will match all characters except newline whereas I use [\S]+ (note it is a capital S) which will match all non-whitespace characters. So, this doesn't match more than one token on one line. Hence, on each line you can match at max one token and that must be of the form that you have defined.
For a regex to match a single URL, look at this question on StackOverflow:
What is the best regular expression to check if a string is a valid URL?
I don't know whether google-forms have a length limit. But if they have, it is sure to almost bounce into it.
If i understand right - in your regexp missing m flag for multiline, so you need something like this
/^(https?://.+this your reg exp for one url)$/m
sample with regexp from Javascript URL validation regex
/^(ht|f)tps?:\/\/[a-z0-9-\.]+\.[a-z]{2,4}\/?([^\s<>\#%"\,\{\}\\|\\\^\[\]`]+)?$/m

Extract string when preceding number or combo of preceding characters is unknown

Here's an example string:
++++#foo+bar+baz++#yikes
I need to extract foo and only foo from there or a similar scenario.
The + and the # are the only characters I need to worry about.
However, regardless of what precedes foo, it needs to be stripped or ignored. Everything else after it needs to as well.
try this:
/\++#(\w+)/
and catch the capturing group one.
You can simply use the match() method.
var str = "++++#foo+bar+baz++#yikes";
var res = str.match(/\w+/g);
console.log(res[0]); // foo
console.log(res); // foo,bar,baz,yikes
Or use exec
var str = "++++#foo+bar+baz++#yikes";
var match = /(\w+)/.exec(str);
alert(match[1]); // foo
Using exec with a g modifier (global) is meant to be used in a loop getting all sub matches.
var str = "++++#foo+bar+baz++#yikes";
var re = /\w+/g;
var match;
while (match = re.exec(str)) {
// In array form, match is now your next match..
}
How exactly do + and # play a role in identifying foo? If you just want any string that follows # and is terminated by + that's as simple as:
var foostring = '++++#foo+bar+baz++#yikes';
var matches = (/\#([^+]+)\+/g).exec(foostring);
if (matches.length > 1) {
// all the matches are found in elements 1 .. length - 1 of the matches array
alert('found ' + matches[1] + '!'); // alerts 'found foo!'
}
To help you more specifically, please provide information about the possible variations of your data and how you would go about identifying the token you want to extract even in cases of differing lengths and characters.
If you are just looking for the first segment of text preceded and followed by any combination of + and #, then use:
var foostring = '++++#foo+bar+baz++#yikes';
var result = foostring.match(/[^+#]+/);
// will be the single-element array, ['foo'], or null.
Depending on your data, using \w may be too restrictive as it is equivalent to [a-zA-z0-9_]. Does your data have anything else such as punctuation, dashes, parentheses, or any other characters that you do want to include in the match? Using the negated character class I suggest will catch every token that does not contain a + or a #.

Javascript Remove strings in beginning and end

base on the following string
...here..
..there...
.their.here.
How can i remove the . on the beginning and end of string like the trim that removes all spaces, using javascript
the output should be
here
there
their.here
These are the reasons why the RegEx for this task is /(^\.+|\.+$)/mg:
Inside /()/ is where you write the pattern of the substring you want to find in the string:
/(ol)/ This will find the substring ol in the string.
var x = "colt".replace(/(ol)/, 'a'); will give you x == "cat";
The ^\.+|\.+$ in /()/ is separated into 2 parts by the symbol | [means or]
^\.+ and \.+$
^\.+ means to find as many . as possible at the start.
^ means at the start; \ is to escape the character; adding + behind a character means to match any string containing one or more that character
\.+$ means to find as many . as possible at the end.
$ means at the end.
The m behind /()/ is used to specify that if the string has newline or carriage return characters, the ^ and $ operators will now match against a newline boundary, instead of a string boundary.
The g behind /()/ is used to perform a global match: so it find all matches rather than stopping after the first match.
To learn more about RegEx you can check out this guide.
Try to use the following regex
var text = '...here..\n..there...\n.their.here.';
var replaced = text.replace(/(^\.+|\.+$)/mg, '');
Here is working Demo
Use Regex /(^\.+|\.+$)/mg
^ represent at start
\.+ one or many full stops
$ represents at end
so:
var text = '...here..\n..there...\n.their.here.';
alert(text.replace(/(^\.+|\.+$)/mg, ''));
Here is an non regular expression answer which utilizes String.prototype
String.prototype.strim = function(needle){
var first_pos = 0;
var last_pos = this.length-1;
//find first non needle char position
for(var i = 0; i<this.length;i++){
if(this.charAt(i) !== needle){
first_pos = (i == 0? 0:i);
break;
}
}
//find last non needle char position
for(var i = this.length-1; i>0;i--){
if(this.charAt(i) !== needle){
last_pos = (i == this.length? this.length:i+1);
break;
}
}
return this.substring(first_pos,last_pos);
}
alert("...here..".strim('.'));
alert("..there...".strim('.'))
alert(".their.here.".strim('.'))
alert("hereagain..".strim('.'))
and see it working here : http://jsfiddle.net/cettox/VQPbp/
Slightly more code-golfy, if not readable, non-regexp prototype extension:
String.prototype.strim = function(needle) {
var out = this;
while (0 === out.indexOf(needle))
out = out.substr(needle.length);
while (out.length === out.lastIndexOf(needle) + needle.length)
out = out.slice(0,out.length-needle.length);
return out;
}
var spam = "this is a string that ends with thisthis";
alert("#" + spam.strim("this") + "#");
Fiddle-ige
Use RegEx with javaScript Replace
var res = s.replace(/(^\.+|\.+$)/mg, '');
We can use replace() method to remove the unwanted string in a string
Example:
var str = '<pre>I'm big fan of Stackoverflow</pre>'
str.replace(/<pre>/g, '').replace(/<\/pre>/g, '')
console.log(str)
output:
Check rules on RULES blotter

How can I remove all characters up to and including the 3rd slash in a string?

I'm having trouble with removing all characters up to and including the 3 third slash in JavaScript. This is my string:
http://blablab/test
The result should be:
test
Does anybody know the correct solution?
To get the last item in a path, you can split the string on / and then pop():
var url = "http://blablab/test";
alert(url.split("/").pop());
//-> "test"
To specify an individual part of a path, split on / and use bracket notation to access the item:
var url = "http://blablab/test/page.php";
alert(url.split("/")[3]);
//-> "test"
Or, if you want everything after the third slash, split(), slice() and join():
var url = "http://blablab/test/page.php";
alert(url.split("/").slice(3).join("/"));
//-> "test/page.php"
var string = 'http://blablab/test'
string = string.replace(/[\s\S]*\//,'').replace(/[\s\S]*\//,'').replace(/[\s\S]*\//,'')
alert(string)
This is a regular expression. I will explain below
The regex is /[\s\S]*\//
/ is the start of the regex
Where [\s\S] means whitespace or non whitespace (anything), not to be confused with . which does not match line breaks (. is the same as [^\r\n]).
* means that we match anywhere from zero to unlimited number of [\s\S]
\/ Means match a slash character
The last / is the end of the regex
var str = "http://blablab/test";
var index = 0;
for(var i = 0; i < 3; i++){
index = str.indexOf("/",index)+1;
}
str = str.substr(index);
To make it a one liner you could make the following:
str = str.substr(str.indexOf("/",str.indexOf("/",str.indexOf("/")+1)+1)+1);
You can use split to split the string in parts and use slice to return all parts after the third slice.
var str = "http://blablab/test",
arr = str.split("/");
arr = arr.slice(3);
console.log(arr.join("/")); // "test"
// A longer string:
var str = "http://blablab/test/test"; // "test/test";
You could use a regular expression like this one:
'http://blablab/test'.match(/^(?:[^/]*\/){3}(.*)$/);
// -> ['http://blablab/test', 'test]
A string’s match method gives you either an array (of the whole match, in this case the whole input, and of any capture groups (and we want the first capture group)), or null. So, for general use you need to pull out the 1th element of the array, or null if a match wasn’t found:
var input = 'http://blablab/test',
re = /^(?:[^/]*\/){3}(.*)$/,
match = input.match(re),
result = match && match[1]; // With this input, result contains "test"
let str = "http://blablab/test";
let data = new URL(str).pathname.split("/").pop();
console.log(data);

Categories

Resources